diff --git a/.ci/azure/ci_utils/onnxruntime/version b/.ci/azure/ci_utils/onnxruntime/version index 3abd49542da1e3..27e0d15e9f8667 100644 --- a/.ci/azure/ci_utils/onnxruntime/version +++ b/.ci/azure/ci_utils/onnxruntime/version @@ -1 +1 @@ -rel-1.7.1 +rel-1.8.1 diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index 7b1ee18d792d74..592614b677ddf7 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -16,13 +16,12 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_F16S_WU2 + name: LIN_VMSS_VENV_F16S_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 16 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib @@ -43,6 +42,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -74,22 +74,26 @@ jobs: submodules: recursive path: openvino_contrib - - checkout: testdata - clean: true - lfs: true - path: testdata - - script: | - sudo apt --assume-yes install libusb-1.0-0-dev - # For opencv-python: setuptools and upgrade - sudo apt-get install python3-setuptools patchelf + set -e + $(REPO_DIR)/install_build_dependencies.sh + # Move jdk into contrib + sudo apt --assume-yes install openjdk-11-jdk + # For opencv-python: python3-setuptools and pip upgrade python3 -m pip install --upgrade pip python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt # For running Python API tests python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt + # For running PaddlePaddle frontend unit tests + python3 -m pip install -r $(REPO_DIR)/ngraph/test/frontend/paddlepaddle/requirements_dev.txt + # For running ONNX frontend unit tests + python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt + # For MO unit tests + python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt + python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ # Speed up tests @@ -97,6 +101,11 @@ jobs: workingDirectory: $(WORK_DIR) displayName: 'Install dependencies' + - checkout: testdata + clean: true + lfs: true + path: testdata + - task: CMake@1 inputs: # CMake must get Python 3.x version by default @@ -105,12 +114,14 @@ jobs: -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON - -DPYTHON_EXECUTABLE=/usr/bin/python3.6 + -DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DENABLE_WHEEL=ON -DENABLE_TESTS=ON -DNGRAPH_ONNX_IMPORT_ENABLE=ON + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DENABLE_FASTER_BUILD=ON -DENABLE_STRICT_DEPENDENCIES=OFF + -DENABLE_REQUIREMENTS_INSTALL=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) @@ -133,8 +144,10 @@ jobs: displayName: 'List install files' - script: | + set -e mkdir $(INSTALL_DIR)/opencv/ - cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu18/opencv/* $(INSTALL_DIR)/opencv/ + cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake + cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/ workingDirectory: $(BUILD_DIR) displayName: 'Install tests' @@ -149,11 +162,26 @@ jobs: workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' + - script: rm -fr $(BUILD_DIR) + displayName: 'Clean build dir' + continueOnError: false + + - script: | + export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer + . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml + displayName: 'Model Optimizer UT' + continueOnError: false + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(INSTALL_TEST_DIR) displayName: 'nGraph UT' continueOnError: false - # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1 + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml + displayName: 'PaddlePaddle Frontend UT' + continueOnError: false + + # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=16 --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1 - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml displayName: 'IE UT old' continueOnError: false @@ -201,10 +229,9 @@ jobs: export DATA_PATH=$(MODELS_PATH) export MODELS_PATH=$(MODELS_PATH) cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests - . $(SETUPVARS) -pyver 3.6 && pytest pytest --junitxml=TEST-PythonAPI.xml + . $(SETUPVARS) -pyver 3.8 && python3 -m pytest --junitxml=TEST-PythonAPI.xml displayName: 'Python API Tests' continueOnError: false - enabled: false - task: PublishTestResults@2 condition: always() diff --git a/.ci/azure/linux_conditional_compilation.yml b/.ci/azure/linux_conditional_compilation.yml index 6d2d33574b7295..a4063d2c9031f0 100644 --- a/.ci/azure/linux_conditional_compilation.yml +++ b/.ci/azure/linux_conditional_compilation.yml @@ -4,20 +4,18 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_F16S_WU2 + name: LIN_VMSS_VENV_F16S_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 16 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib MODELS_PATH: $(REPO_DIR)/../testdata WORK_DIR: $(Pipeline.Workspace)/_w BUILD_DIR: $(WORK_DIR)/build - BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE) INSTALL_DIR: $(WORK_DIR)/install_pkg SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh @@ -30,6 +28,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -53,10 +52,11 @@ jobs: path: openvino - script: | - sudo apt --assume-yes install libusb-1.0-0-dev + set -e + $(REPO_DIR)/install_build_dependencies.sh python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ workingDirectory: $(WORK_DIR) @@ -76,12 +76,14 @@ jobs: - script: ninja workingDirectory: $(BUILD_DIR) - displayName: 'Build' + displayName: 'Build LinCC' - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List files' + displayName: 'List bin files' - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) displayName: 'Install' + - script: ls -alR $(INSTALL_DIR) + displayName: 'List install files' diff --git a/.ci/azure/linux_ngraph_onnx.yml b/.ci/azure/linux_ngraph_onnx.yml index 28326c890536d5..c6071fc127ffeb 100644 --- a/.ci/azure/linux_ngraph_onnx.yml +++ b/.ci/azure/linux_ngraph_onnx.yml @@ -20,13 +20,12 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_ONNX_WU2 + name: LIN_VMSS_VENV_ONNX_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 REPO_DIR: $(Build.Repository.LocalPath) WORK_DIR: $(Pipeline.Workspace)/_w MODELS_DIR: /mount/cinfsshare/onnxtestdata @@ -43,6 +42,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -68,16 +68,23 @@ jobs: submodules: recursive path: openvino - - script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) . + - script: | + set -e + sudo apt --assume-yes install git-lfs uidmap + curl -fsSL https://get.docker.com -o get-docker.sh + sudo sh get-docker.sh + workingDirectory: $(WORK_DIR) + displayName: 'Install dependencies' + + - script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) . displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)" displayName: 'Get models' condition: ne(variables['BUILD_TYPE'], 'Debug') - - script: sudo fallocate -l 48G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h + - script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h displayName: 'Create swap' - - script: | - docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)" + - script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)" displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml index fce8fdddcc4f91..0229c37c0b09d0 100644 --- a/.ci/azure/linux_onnxruntime.yml +++ b/.ci/azure/linux_onnxruntime.yml @@ -3,23 +3,23 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_ONNX_WU2 + name: LIN_VMSS_VENV_ONNX_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) ONNXRUNTIME_REPO_DIR: $(REPO_DIR)/../onnxruntime WORK_DIR: $(Pipeline.Workspace)/_w MODELS_DIR: /mount/cinfsshare/onnxtestdata TMP_DIR: /mnt/tmp - INSTALL_DIR: $(WORK_DIR)/install_pkg + INSTALL_DIR: $(WORK_DIR)/install_pkg/openvino BUILD_DIR: $(WORK_DIR)/build ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build + steps: - script: | curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" @@ -29,6 +29,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -60,15 +61,14 @@ jobs: displayName: 'Clone onnxruntime' - script: | - sudo apt --assume-yes install libusb-1.0-0-dev - # For opencv-python: setuptools and upgrade - sudo apt-get install python3-setuptools + set -e + $(REPO_DIR)/install_build_dependencies.sh python3 -m pip install --upgrade pip python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt # For running Python API tests python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ # Speed up tests @@ -83,7 +83,7 @@ jobs: -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON - -DPYTHON_EXECUTABLE=/usr/bin/python3.6 + -DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DENABLE_VPU=OFF -DENABLE_GNA=OFF -DENABLE_OPENCV=OFF @@ -95,16 +95,17 @@ jobs: -DENABLE_SAMPLES=OFF -DENABLE_SPEECH_DEMO=OFF -DNGRAPH_ONNX_IMPORT_ENABLE=ON + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DNGRAPH_DEBUG_ENABLE=OFF $(REPO_DIR) workingDirectory: $(BUILD_DIR) - script: ninja workingDirectory: $(BUILD_DIR) - displayName: 'Build Lin' + displayName: 'Build Lin ONNX' - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List files' + displayName: 'List bin files' - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) @@ -112,10 +113,9 @@ jobs: - script: | source $(INSTALL_DIR)/bin/setupvars.sh - echo "2021.2" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt CXXFLAGS="-Wno-error=deprecated-declarations" ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR) workingDirectory: $(ONNXRUNTIME_REPO_DIR) - displayName: 'Build ONNX Runtime' + displayName: 'Build Lin ONNX Runtime' - script: | source $(INSTALL_DIR)/bin/setupvars.sh diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index 04d4c16ea23344..b07ff48f78ccc3 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -22,7 +22,6 @@ jobs: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 3 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib @@ -76,6 +75,7 @@ jobs: - script: | brew install cython brew install automake + python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt # Speed up build brew install ninja # Speed up tests @@ -87,10 +87,7 @@ jobs: export PATH="/usr/local/opt/cython/bin:$PATH" export CC=gcc export CXX=g++ - # Disable errors with Ninja - export CXXFLAGS="-Wno-error=unused-command-line-argument" - export CFLAGS="-Wno-error=unused-command-line-argument" - cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) + cmake -GNinja -DVERBOSE_BUILD=ON -DENABLE_REQUIREMENTS_INSTALL=OFF -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' @@ -106,6 +103,7 @@ jobs: displayName: 'Install' - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(BIN_DIR) displayName: 'nGraph UT' continueOnError: false diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index c368776c8f4e4a..e5ec0486f9bcd2 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -22,7 +22,6 @@ jobs: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib @@ -42,6 +41,7 @@ jobs: - script: | powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom" where python3 + python3 --version where python python --version where java @@ -83,7 +83,18 @@ jobs: path: testdata - script: | - certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip + python -m pip install --upgrade pip + rem For running Python API tests + python -m pip install -r $(REPO_DIR)\inference-engine\ie_bridges\python\src\requirements-dev.txt + rem For running PaddlePaddle frontend unit tests + python -m pip install -r $(REPO_DIR)\ngraph\test\frontend\paddlepaddle\requirements_dev.txt + rem For running ONNX frontend unit tests + python -m pip install -r $(REPO_DIR)\ngraph\test\requirements_test_onnx.txt + rem For MO unit tests + python -m pip install -r $(REPO_DIR)\model-optimizer\requirements.txt + python -m pip install -r $(REPO_DIR)\model-optimizer\requirements_dev.txt + rem Speed up build + certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip powershell -command "Expand-Archive -Force ninja-win.zip" git clone https://github.com/google/gtest-parallel.git workingDirectory: $(WORK_DIR) @@ -91,7 +102,7 @@ jobs: - script: | set PATH=$(WORK_DIR)\ninja-win;%PATH% - call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) + call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' @@ -129,10 +140,19 @@ jobs: workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' + - script: rd /Q /S $(BUILD_DIR) + displayName: 'Clean build dir' + continueOnError: false + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(INSTALL_TEST_DIR) displayName: 'nGraph UT' continueOnError: false + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml + displayName: 'PaddlePaddle Frontend UT' + continueOnError: false + - script: | set PATH=$(IB_DIR);%PATH% call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml diff --git a/.ci/openvino-onnx/Dockerfile b/.ci/openvino-onnx/Dockerfile index 9b0f48cf66cc3e..315598225627e0 100644 --- a/.ci/openvino-onnx/Dockerfile +++ b/.ci/openvino-onnx/Dockerfile @@ -69,6 +69,7 @@ RUN cmake .. \ -DENABLE_PYTHON=ON \ -DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DNGRAPH_ONNX_IMPORT_ENABLE=ON \ + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \ -DNGRAPH_DEBUG_ENABLE=OFF \ -DCMAKE_INSTALL_PREFIX=/openvino/dist \ -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE} diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 607fe2cb64ae1a..7969cf13aa15e9 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -25,7 +25,7 @@ jobs: run: | mkdir build cd build - cmake -DENABLE_PYTHON=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. + cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. - name: Check code style run: cmake --build build --target clang_format_check_all diff --git a/.gitmodules b/.gitmodules index d3f72b54c4f9c5..8aae5ad4da3d6d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,7 +40,7 @@ ignore = dirty [submodule "thirdparty/onnx"] path = thirdparty/onnx/onnx - url = https://github.com/openvinotoolkit/onnx.git + url = https://github.com/onnx/onnx.git [submodule "thirdparty/protobuf"] path = thirdparty/protobuf/protobuf url = https://github.com/protocolbuffers/protobuf.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 6019fe63d931f1..61a96ae9f4c816 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,50 +42,6 @@ endforeach() # Build # -function(build_ngraph) - function(ngraph_set option value) - if(NOT DEFINED ${option}) - set(${option} ${value} CACHE BOOL "" FORCE) - endif() - endfunction() - - if(ENABLE_TESTS AND NOT ANDROID) - ngraph_set(NGRAPH_UNIT_TEST_ENABLE ON) - else() - ngraph_set(NGRAPH_UNIT_TEST_ENABLE OFF) - endif() - - if(NOT (ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)) )) - ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE ON) - ngraph_set(NGRAPH_PDPD_FRONTEND_ENABLE ON) - else() - ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE OFF) - ngraph_set(NGRAPH_PDPD_FRONTEND_ENABLE OFF) - endif() - - if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") - ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion) - elseif(UNIX) - ie_add_compiler_flags(-Wno-error=maybe-uninitialized -Wno-error=return-type) - endif() - - # WA for GCC 7.0 - if (UNIX) - ie_add_compiler_flags(-Wno-error=return-type -Wno-undef) - elseif(WIN32) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4308 /wd4146 /wd4703 /wd4244 /wd4819") - endif() - - if(ENABLE_LTO) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) - endif() - - ie_cpack_add_component(ngraph REQUIRED) - ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph) - - add_subdirectory(ngraph) -endfunction() - function(openvino_developer_export_targets) cmake_parse_arguments(EXPORT "" "COMPONENT" "TARGETS" ${ARGN}) @@ -118,9 +74,15 @@ function(openvino_developer_export_targets) "A list of OpenVINO exported components" FORCE) endfunction() +ie_cpack_add_component(ngraph REQUIRED) +ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph) + +# add target with processed tests model zoo +include(cmake/test_model_zoo.cmake) + add_subdirectory(thirdparty) add_subdirectory(openvino) -build_ngraph() +add_subdirectory(ngraph) add_subdirectory(inference-engine) # for Template plugin diff --git a/cmake/coverage.cmake b/cmake/coverage.cmake index 60c137337b3173..4d8976e0a80beb 100644 --- a/cmake/coverage.cmake +++ b/cmake/coverage.cmake @@ -92,9 +92,15 @@ ie_coverage_genhtml(INFO_FILE "ngraph" if(NGRAPH_ONNX_IMPORT_ENABLE) ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer" - PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_common*" - "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_editor*" - "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_import*") + PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*" + "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*") ie_coverage_genhtml(INFO_FILE "onnx_importer" PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") endif() + +if(NGRAPH_ONNX_FRONTEND_ENABLE) + ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend" + PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*") + ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend" + PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") +endif() diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index e84a7cdc7180cc..82f98b4c515ff3 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -7,10 +7,6 @@ cmake_policy(SET CMP0054 NEW) # TODO: fix it set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}") -if(CMAKE_CROSSCOMPILING) - set(CMAKE_STAGING_PREFIX "${TEMP}") -endif() - if(ENABLE_SAME_BRANCH_FOR_MODELS) branchName(MODELS_BRANCH) else() @@ -315,25 +311,25 @@ if(ENABLE_SPEECH_DEMO) if(DEFINED IE_PATH_TO_DEPS) if(WIN32 AND X86_64) RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_WIN "speech_demo_1.0.0.774_windows.zip" + ARCHIVE_WIN "speech_demo_1.0.0.780_windows.zip" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "67b25170be5e89a4f0e90e8b39623b60c9a15b965c30329385e295fcd2edc856") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "957bd274a1f6dc1d83a46879c7ef3b3b06f17d11af85cc45c18919051d145abd") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) elseif(LINUX AND X86_64) if(LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.774_centos.tgz" + ARCHIVE_LIN "speech_demo_1.0.0.780_centos.tgz" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "5ec3b7be9ae05376aefae5bd5fd4a39b12c274e82817fd3218120b8e8fc8ff5a") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "6d8d1111d0e662fe71d71cd3debad2995f6fb6fe5df3b92196dae06ff7abdf44") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) else() RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.774_linux.tgz" + ARCHIVE_LIN "speech_demo_1.0.0.780_linux.tgz" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "f0bbd0a6218b0365e7cfb1f860b34e4ace7e0d47dd60b369cdea8a480329810f") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "0ec6f1e47c00d781dc918af5d3055ab474ff47b9978dd6fe2add73e3339b0763") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) endif() else() diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index 46423aa61c72e7..a2b83ed9726bcd 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -187,8 +187,8 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Enable CMAKE__COMPILER_ID AppleClang set(CMAKE_POLICY_DEFAULT_CMP0025 NEW) -set(CMAKE_WARN_DEPRECATED OFF) -set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON) +set(CMAKE_WARN_DEPRECATED OFF CACHE BOOL "Don't warn about obsolete cmake versions in 3rdparty") +set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON CACHE BOOL "Warn about absolute paths in destination") # LTO @@ -254,8 +254,10 @@ endfunction() function(ie_check_pip_package name message_type) find_package(PythonInterp 3 REQUIRED) + get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY) execute_process( COMMAND ${PYTHON_EXECUTABLE} -m pip show ${name} + WORKING_DIRECTORY ${PYTHON_EXEC_DIR} RESULT_VARIABLE PIP_EXIT_CODE OUTPUT_QUIET ) diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index 072f2a0dcee22a..d2c51130a9567d 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -3,6 +3,7 @@ # include(ProcessorCount) +include(CheckCXXCompilerFlag) # # Disables deprecated warnings generation @@ -67,13 +68,13 @@ function(ie_sse42_optimization_flags flags) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # No such option for MSVC 2019 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} /arch:SSE4.2 /QxSSE4.2 PARENT_SCOPE) + set(${flags} /QxSSE4.2 PARENT_SCOPE) else() message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -msse4.2 -xSSE4.2 PARENT_SCOPE) + set(${flags} -xSSE4.2 PARENT_SCOPE) else() set(${flags} -msse4.2 PARENT_SCOPE) endif() @@ -94,7 +95,7 @@ function(ie_avx2_optimization_flags flags) endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -march=core-avx2 -xCORE-AVX2 -mtune=core-avx2 PARENT_SCOPE) + set(${flags} -xCORE-AVX2 PARENT_SCOPE) else() set(${flags} -mavx2 -mfma PARENT_SCOPE) endif() @@ -151,6 +152,24 @@ function(ie_arm_neon_optimization_flags flags) endif() endfunction() +# +# Disables all warnings for 3rd party targets +# +function(ov_disable_all_warnings) + foreach(target IN LISTS ARGN) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(${target} PRIVATE /WX-) + elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) + target_compile_options(${target} PRIVATE -w) + elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 193: zero used for undefined preprocessing identifier "XXX" + # 1011: missing return statement at end of non-void function "XXX" + # 2415: variable "xxx" of static storage duration was declared but never referenced + target_compile_options(${target} PRIVATE -diag-disable=warn,193,1011,2415) + endif() + endforeach() +endfunction() + # # Enables Link Time Optimization compilation # @@ -285,23 +304,27 @@ else() ie_add_compiler_flags(-Wreturn-type) ie_add_compiler_flags(-Wunused-variable) - # Disable noisy warnings - if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") ie_add_compiler_flags(-Wswitch) elseif(UNIX) ie_add_compiler_flags(-Wuninitialized -Winit-self) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - ie_add_compiler_flags(-Wno-error=switch) + ie_add_compiler_flags(-Winconsistent-missing-override + -Wstring-plus-int) else() ie_add_compiler_flags(-Wmaybe-uninitialized) + check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED) + if(SUGGEST_OVERRIDE_SUPPORTED) + set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}") + endif() endif() endif() + # Disable noisy warnings + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - ie_add_compiler_flags(-diag-disable=remark) - # noisy warnings from Intel Compiler 19.1.1.217 20200306 - ie_add_compiler_flags(-diag-disable=2196) + # 177: function "XXX" was declared but never referenced + ie_add_compiler_flags(-diag-disable=remark,177,2196) endif() # Linker flags @@ -309,10 +332,32 @@ else() if(APPLE) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-dead_strip") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-dead_strip") elseif(LINUX) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") endif() endif() + +# Links provided libraries and include their INTERFACE_INCLUDE_DIRECTORIES as SYSTEM +function(link_system_libraries TARGET_NAME) + set(MODE PRIVATE) + + foreach(arg IN LISTS ARGN) + if(arg MATCHES "(PRIVATE|PUBLIC|INTERFACE)") + set(MODE ${arg}) + else() + if(TARGET "${arg}") + target_include_directories(${TARGET_NAME} + SYSTEM ${MODE} + $ + $ + ) + endif() + + target_link_libraries(${TARGET_NAME} + ${MODE} + ${arg} + ) + endif() + endforeach() +endfunction() diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake index dbf351965079a3..298f42437997f9 100644 --- a/cmake/developer_package/compile_flags/sanitizer.cmake +++ b/cmake/developer_package/compile_flags/sanitizer.cmake @@ -6,7 +6,7 @@ include(CheckCXXCompilerFlag) if (ENABLE_SANITIZER) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address") - CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED) + check_cxx_compiler_flag("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED) if (SANITIZE_RECOVER_ADDRESS_SUPPORTED) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address") endif() @@ -18,7 +18,9 @@ if (ENABLE_UB_SANITIZER) # TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved: # https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17 set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null") - CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED) + # TODO: Remove -Wno-maybe-uninitialized after CVS-61143 fix + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -Wno-maybe-uninitialized") + check_cxx_compiler_flag("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED) if (SANITIZE_RECOVER_UNDEFINED_SUPPORTED) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=undefined") endif() @@ -33,17 +35,18 @@ endif() # common sanitizer options if (DEFINED SANITIZER_COMPILER_FLAGS) - # ensure sumbols are present - set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer") + # ensure symbols are present + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer") + if(NOT OV_COMPILER_IS_CLANG) + # GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC. + # Clang has no var-tracking-assignments. + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments") + endif() # prevent unloading libraries at runtime, so sanitizer can resolve their symbols set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32) - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld") - endif() + if(OV_COMPILER_IS_CLANG AND NOT WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}") diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake index 10a1e86ad6d48f..7690a9031d864a 100644 --- a/cmake/developer_package/compile_flags/sdl.cmake +++ b/cmake/developer_package/compile_flags/sdl.cmake @@ -23,7 +23,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") if (NOT ENABLE_SANITIZER) set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s") endif() - elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + elseif(OV_COMPILER_IS_CLANG) set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if (NOT ENABLE_SANITIZER) diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake index 487dea8c7e382a..03c94ed41f7569 100644 --- a/cmake/developer_package/features.cmake +++ b/cmake/developer_package/features.cmake @@ -44,13 +44,9 @@ ie_option (BUILD_SHARED_LIBS "Build as a shared library" ON) ie_dependent_option (ENABLE_FASTER_BUILD "Enable build features (PCH, UNITY) to speed up build time" OFF "CMAKE_VERSION VERSION_GREATER_EQUAL 3.16" OFF) -if(NOT DEFINED ENABLE_CPPLINT) - ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF) -endif() +ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF) -if(NOT DEFINED ENABLE_CPPLINT_REPORT) - ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF) -endif() +ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF) ie_dependent_option (ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ON "UNIX;NOT ANDROID" OFF) @@ -58,7 +54,7 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF) ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF) -ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF) +ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "OV_COMPILER_IS_CLANG; NOT WIN32" OFF) # # Check features diff --git a/cmake/developer_package/packaging.cmake b/cmake/developer_package/packaging.cmake index 4cb21210d4a166..4095a16157c068 100644 --- a/cmake/developer_package/packaging.cmake +++ b/cmake/developer_package/packaging.cmake @@ -53,7 +53,9 @@ macro(ie_cpack) set(CPACK_PACKAGE_VENDOR "Intel Corporation") set(CPACK_VERBATIM_VARIABLES ON) set(CPACK_COMPONENTS_ALL ${ARGN}) - set(CPACK_STRIP_FILES ON) + if (NOT DEFINED CPACK_STRIP_FILES) + set(CPACK_STRIP_FILES ON) + endif() set(CPACK_THREADS 8) string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}") diff --git a/cmake/developer_package/target_flags.cmake b/cmake/developer_package/target_flags.cmake index 181c4dd4187e1b..d4fd9837647005 100644 --- a/cmake/developer_package/target_flags.cmake +++ b/cmake/developer_package/target_flags.cmake @@ -55,3 +55,9 @@ endif() if(UNIX AND NOT APPLE) set(LINUX ON) endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + set(OV_COMPILER_IS_CLANG ON) +else() + set(OV_COMPILER_IS_CLANG OFF) +endif() diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake index 054bc10c78ed9a..8e6433034d0754 100644 --- a/cmake/developer_package/version.cmake +++ b/cmake/developer_package/version.cmake @@ -42,7 +42,7 @@ macro(ie_parse_ci_build_number) return() endif() - set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/include/ie_version.hpp") + set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/include/ie/ie_version.hpp") if(NOT EXISTS ${ie_version_hpp}) message(FATAL_ERROR "File ie_version.hpp with IE_VERSION definitions is not found") endif() diff --git a/cmake/features.cmake b/cmake/features.cmake index 1f0c198913cc23..26bf48f3824f8b 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -38,8 +38,6 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL find_package(PythonInterp 3 QUIET) ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF) -ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF) - # # Inference Engine specific options # @@ -100,7 +98,7 @@ ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF) -ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86 OR X86_64" OFF) +ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86_64" OFF) ie_option (ENABLE_OPENCV "enables OpenCV" ON) @@ -112,7 +110,33 @@ ie_dependent_option(ENABLE_TBB_RELEASE_ONLY "Only Release TBB libraries are link ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF) -ie_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" OFF) +ie_option (ENABLE_DEBUG_CAPS "enable OpenVINO debug capabilities at runtime" OFF) + +ie_dependent_option (ENABLE_GPU_DEBUG_CAPS "enable GPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF) + +ie_dependent_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF) + +if(ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64))) + set(protoc_available OFF) +else() + set(protoc_available ON) +endif() + +ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF) +ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF) +ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF) +ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON + "NGRAPH_ONNX_IMPORT_ENABLE" OFF) +ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF + "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) +ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF) +ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON + "NGRAPH_UNIT_TEST_ENABLE" OFF) +option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF) +option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON) + +# WA for ngraph python build on Windows debug +list(REMOVE_ITEM IE_OPTIONS NGRAPH_UNIT_TEST_ENABLE NGRAPH_UNIT_TEST_BACKENDS_ENABLE) # # Process featues diff --git a/cmake/templates/InferenceEngineConfig.cmake.in b/cmake/templates/InferenceEngineConfig.cmake.in index 261edbf3d730f3..43408483f9af6e 100644 --- a/cmake/templates/InferenceEngineConfig.cmake.in +++ b/cmake/templates/InferenceEngineConfig.cmake.in @@ -73,6 +73,10 @@ function(_ie_target_no_deprecation_error) else() set(flags "-Wno-error=deprecated-declarations") endif() + if(CMAKE_CROSSCOMPILING) + set_target_properties(${ARGV} PROPERTIES + INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined") + endif() set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags}) endif() diff --git a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in index 4aca14b72bd46d..72af5ca89cadac 100644 --- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in +++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in @@ -13,7 +13,7 @@ set_and_check(IE_MAIN_SOURCE_DIR "@IE_MAIN_SOURCE_DIR@") # HDDL # Variables to export in plugin's projects -set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH;") +set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH") list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER) file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path) @@ -73,6 +73,9 @@ if(NOT MSVC) ie_add_compiler_flags(-Wno-error=unused-variable) if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-error=unused-but-set-variable) + if(SUGGEST_OVERRIDE_SUPPORTED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") + endif() endif() endif() diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake new file mode 100644 index 00000000000000..c3f158626cdabe --- /dev/null +++ b/cmake/test_model_zoo.cmake @@ -0,0 +1,131 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +function(ov_model_convert SRC DST OUT) + set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py) + + file(GLOB_RECURSE prototxt_models RELATIVE "${SRC}" "${SRC}/*.prototxt") + file(GLOB_RECURSE xml_models RELATIVE "${SRC}" "${SRC}/*.xml") + file(GLOB_RECURSE bin_models RELATIVE "${SRC}" "${SRC}/*.bin") + file(GLOB_RECURSE onnx_models RELATIVE "${SRC}" "${SRC}/*.onnx") + file(GLOB_RECURSE data_models RELATIVE "${SRC}" "${SRC}/*.data") + + foreach(in_file IN LISTS prototxt_models xml_models bin_models onnx_models data_models) + get_filename_component(ext "${in_file}" EXT) + get_filename_component(rel_dir "${in_file}" DIRECTORY) + get_filename_component(name_we "${in_file}" NAME_WE) + set(model_source_dir "${SRC}/${rel_dir}") + + if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$") + # don't copy / process ONNX / prototxt files + continue() + endif() + + if(ext STREQUAL ".prototxt") + # convert model + set(rel_out_name "${name_we}.onnx") + if(rel_dir) + set(rel_out_name "${rel_dir}/${rel_out_name}") + endif() + else() + # copy as is + set(rel_out_name "${in_file}") + endif() + + set(full_out_name "${DST}/${rel_out_name}") + file(MAKE_DIRECTORY "${DST}/${rel_dir}") + + if(ext STREQUAL ".prototxt") + # convert .prototxt models to .onnx binary + add_custom_command(OUTPUT ${full_out_name} + COMMAND ${PYTHON_EXECUTABLE} ${onnx_gen_script} + "${SRC}/${in_file}" ${full_out_name} + DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" + COMMENT "Generate ${rel_out_name}" + WORKING_DIRECTORY "${model_source_dir}") + else() + add_custom_command(OUTPUT ${full_out_name} + COMMAND "${CMAKE_COMMAND}" -E copy_if_different + "${SRC}/${in_file}" ${full_out_name} + DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" + COMMENT "Copy ${rel_out_name}" + WORKING_DIRECTORY "${model_source_dir}") + endif() + list(APPEND files "${full_out_name}") + endforeach() + + set(${OUT} ${files} PARENT_SCOPE) +endfunction() + +ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph" + onnx_out_files) + +set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader" + ie_onnx_out_files) + +set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization" + ie_serialize_out_files) + +set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import" + ie_onnx_import_out_files) + +if(ENABLE_TESTS) + if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL) + find_package(PythonInterp 3 REQUIRED) + + get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY) + execute_process(COMMAND "${PYTHON_EXECUTABLE}" -m pip --version + WORKING_DIRECTORY ${PYTHON_EXEC_DIR} + RESULT_VARIABLE pip3_exit_code + OUTPUT_VARIABLE pip3_version) + + if(NOT pip3_exit_code EQUAL 0) + message(FATAL_ERROR "Failed to extract pip module version") + endif() + + if(pip3_version MATCHES ".* ([0-9]+)+\.([0-9]+)([\.0-9 ]).*") + set(pip3_version ${CMAKE_MATCH_1}.${CMAKE_MATCH_2}) + else() + message(FATAL_ERROR "Failed to parse ${pip3_version}") + endif() + + message(STATUS "pip version is ${pip3_version}") + set(args --quiet) + if(pip3_version VERSION_GREATER 20.2.2) + list(APPEND args --use-feature=2020-resolver) + endif() + + set(reqs "${OpenVINO_SOURCE_DIR}/ngraph/test/requirements_test_onnx.txt") + add_custom_target(test_pip_prerequsites ALL + "${PYTHON_EXECUTABLE}" -m pip install ${args} -r ${reqs} + COMMENT "Install requirements_test.txt" + VERBATIM + SOURCES ${reqs}) + endif() + + add_custom_target(test_model_zoo DEPENDS ${onnx_out_files} + ${ie_onnx_out_files} + ${ie_serialize_out_files} + ${ie_onnx_import_out_files}) + + if(TARGET test_pip_prerequsites) + add_dependencies(test_model_zoo test_pip_prerequsites) + endif() + + if (NGRAPH_PDPD_FRONTEND_ENABLE) + add_dependencies(test_model_zoo paddlepaddle_test_models) + endif() + + install(DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo" + DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) + + set(TEST_MODEL_ZOO "./test_model_zoo" CACHE PATH "Path to test model zoo") +endif() diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md index 6feec5f627a82e..eabe4840eb885a 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md @@ -1,6 +1,7 @@ # Converting TensorFlow* Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models} > **NOTES**: +> * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal". > * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. > * To generate IRs for SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` layers instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. @@ -29,14 +30,16 @@ To convert a TensorFlow\* Object Detection API model, go to the `/d * `faster_rcnn_support_api_v1.13.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.X * `faster_rcnn_support_api_v1.14.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively * `faster_rcnn_support_api_v1.15.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0 - * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher + * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively + * `faster_rcnn_support_api_v2.4.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher * `mask_rcnn_support.json` --- for Mask R-CNN topologies from the TF 1.X models zoo trained with TensorFlow\* version 1.9.0 or lower. * `mask_rcnn_support_api_v1.7.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.7.0 up to 1.9.X inclusively * `mask_rcnn_support_api_v1.11.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.11.0 up to 1.12.X inclusively * `mask_rcnn_support_api_v1.13.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.0 up to 1.13.X inclusively * `mask_rcnn_support_api_v1.14.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively * `mask_rcnn_support_api_v1.15.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0 - * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher + * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively + * `mask_rcnn_support_api_v2.4.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher * `rfcn_support.json` --- for RFCN topology from the models zoo trained with TensorFlow\* version up to 1.9.X inclusively * `rfcn_support_api_v1.10.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.10.0 up to 1.12.X inclusively * `rfcn_support_api_v1.13.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.13.X diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md new file mode 100644 index 00000000000000..f02d50499fd857 --- /dev/null +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md @@ -0,0 +1,15 @@ +# Converting RetinaNet Model from TensorFlow* to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow} + +This tutorial explains how to convert RetinaNet model to the Intermediate Representation (IR). + +[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow\* weights. +To convert this model to the TensorFlow\* format, you can use [Reproduce Keras* to TensorFlow* Conversion tutorial](https://docs.openvinotoolkit.org/latest/omz_models_model_retinanet_tf.html). + +After you convert the model to TensorFlow* format, run the Model Optimizer command below: +```sh +python mo.py --input "input_1[1 1333 1333 3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --data_type FP32 --transformations_config ./extensions/front/tf/retinanet.json +``` + +Where `transformations_config` command-line parameter specifies the configuration json file containing model conversion hints for the Model Optimizer. +The json file contains some parameters that need to be changed if you train the model yourself. It also contains information on how to match endpoints +to replace the subgraph nodes. After the model is converted to IR, the output nodes will be replaced with DetectionOutput layer. diff --git a/docs/doxygen/doxygen-ignore.txt b/docs/doxygen/doxygen-ignore.txt index b1f27a4972cb0d..9dc1d61cdb324a 100644 --- a/docs/doxygen/doxygen-ignore.txt +++ b/docs/doxygen/doxygen-ignore.txt @@ -16,8 +16,8 @@ openvino/docs/optimization_guide/dldt_optimization_guide.md openvino/docs/IE_DG/ShapeInference.md build/docs/openvino_docs.xml openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md -inference-engine/include/ie_parallel.hpp -inference-engine/include/ie_plugin_config.hpp -inference-engine/include/vpu/myriad_config.hpp -inference-engine/include/vpu/vpu_config.hpp -inference-engine/include/vpu/vpu_plugin_config.hpp \ No newline at end of file +inference-engine/include/ie/ie_parallel.hpp +inference-engine/include/ie/ie_plugin_config.hpp +inference-engine/include/ie/vpu/myriad_config.hpp +inference-engine/include/ie/vpu/vpu_config.hpp +inference-engine/include/ie/vpu/vpu_plugin_config.hpp diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml index 19a87a1e11e97c..99e91e53ed572f 100644 --- a/docs/doxygen/ie_docs.xml +++ b/docs/doxygen/ie_docs.xml @@ -34,6 +34,7 @@ limitations under the License. + @@ -176,6 +177,7 @@ limitations under the License. + @@ -219,6 +221,7 @@ limitations under the License. + diff --git a/docs/ops/arithmetic/Acosh_3.md b/docs/ops/arithmetic/Acosh_3.md index 79fde27fbd3c20..9f858924d4e01e 100644 --- a/docs/ops/arithmetic/Acosh_3.md +++ b/docs/ops/arithmetic/Acosh_3.md @@ -6,32 +6,28 @@ **Short description**: *Acosh* performs element-wise hyperbolic inverse cosine (arccosh) operation with given tensor. -**Attributes**: +**Detailed description**: Operation takes one input tensor and performs the element-wise hyperbolic inverse cosine operation on a given input tensor, based on the following mathematical formula: - No attributes available. +\f[ +a_{i} = acosh(a_{i}) +\f] + +**Attributes**: *Acosh* operation has no attributes. **Inputs** -* **1**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise acosh operation. A tensor of type *T*. +* **1**: The result of element-wise *Acosh* operation. A tensor of type *T* and the same shape as the input tensor. **Types** -* *T*: any floating-point type. - -*Acosh* does the following with the input tensor *a*: - -\f[ -a_{i} = acosh(a_{i}) -\f] +* *T*: any numeric type. **Examples** -*Example 1* - ```xml diff --git a/docs/ops/arithmetic/Erf_1.md b/docs/ops/arithmetic/Erf_1.md index 6b445dafad29bb..52d2d0301cb679 100644 --- a/docs/ops/arithmetic/Erf_1.md +++ b/docs/ops/arithmetic/Erf_1.md @@ -4,34 +4,32 @@ **Category**: Arithmetic unary operation -**Short description**: *Erf* calculates the Gauss error function element-wise with given tensor. +**Short description**: *Erf* performs element-wise Gauss error function (erf) on a given input tensor. **Detailed Description** -For each element from the input tensor calculates corresponding element in the output tensor with the following formula: +*Erf* performs element-wise erf operation on a given input tensor, based on the following mathematical formula: + \f[ erf(x) = \pi^{-1} \int_{-x}^{x} e^{-t^2} dt \f] -**Attributes**: - - No attributes available. +**Attributes**: *Erf* operation has no attributes. **Inputs** -* **1**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise operation. A tensor of type *T*. +* **1**: The result of element-wise *Erf* function applied to the input tensor. A tensor of type *T* and the same shape as the input tensor. **Types** -* *T*: any supported floating-point type. +* *T*: any supported numeric type. -**Examples** -*Example 1* +**Example** ```xml diff --git a/docs/ops/arithmetic/Sign_1.md b/docs/ops/arithmetic/Sign_1.md index e68cc51f97f7c7..1aa87097e62136 100644 --- a/docs/ops/arithmetic/Sign_1.md +++ b/docs/ops/arithmetic/Sign_1.md @@ -4,33 +4,30 @@ **Category**: Arithmetic unary operation -**Short description**: *Sign* performs element-wise sign operation with given tensor. +**Short description**: *Sign* performs element-wise sign operation on a given input tensor. -**Attributes**: +**Detailed description**: *Sign* performs element-wise sign operation on a given input tensor, based on the following mathematical formula: - No attributes available. +\f[ +a_{i} = sign(a_{i}) +\f] + +**Attributes**: *Sign* operation has no attributes. **Inputs** -* **1**: An tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise sign operation. A tensor of type *T* with mapped elements of the input tensor to -1 (if it is negative), 0 (if it is zero), or 1 (if it is positive). +* **1**: The result of element-wise *Sign* operation. A tensor of type *T* with mapped elements of the input tensor to -1 (if it is negative), 0 (if it is zero), or 1 (if it is positive). **Types** * *T*: any numeric type. -*Sign* does the following with the input tensor *a*: - -\f[ -a_{i} = sign(a_{i}) -\f] - -**Examples** -*Example 1* +**Example** ```xml diff --git a/docs/ops/comparison/LessEqual_1.md b/docs/ops/comparison/LessEqual_1.md index 4144095bed41df..a8b7c8101816dd 100644 --- a/docs/ops/comparison/LessEqual_1.md +++ b/docs/ops/comparison/LessEqual_1.md @@ -4,7 +4,16 @@ **Category**: Comparison binary operation -**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. +**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the *auto_broadcast* attribute. + +**Detailed description** +Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *LessEqual* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} <= b_{i} +\f] **Attributes**: @@ -12,40 +21,33 @@ * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. + * *none* - no auto-broadcasting is allowed, all input shapes should match, + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md), + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md). * **Type**: string * **Default value**: "numpy" * **Required**: *no* **Inputs** -* **1**: A tensor of type *T*. **Required.** -* **2**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** +* **2**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise comparison operation. A tensor of type boolean. +* **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type **boolean** and shape equal to broadcasted shape of two inputs. **Types** * *T*: arbitrary supported type. -**Detailed description** -Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *LessEqual* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} <= b_{i} -\f] - **Examples** -*Example 1* +*Example 1: no broadcast* ```xml + 256 @@ -65,9 +67,10 @@ o_{i} = a_{i} <= b_{i} ``` -*Example 2: broadcast* +*Example 2: numpy broadcast* ```xml + 8 diff --git a/docs/ops/comparison/Less_1.md b/docs/ops/comparison/Less_1.md index 79a154a6c57166..dcf210d6579226 100644 --- a/docs/ops/comparison/Less_1.md +++ b/docs/ops/comparison/Less_1.md @@ -6,6 +6,16 @@ **Short description**: *Less* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. +**Detailed description** +Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *Less* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} < b_{i} +\f] + + **Attributes**: * *auto_broadcast* @@ -13,8 +23,9 @@ * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. - * **Type**: string + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * **Type**: `string` * **Default value**: "numpy" * **Required**: *no* @@ -31,15 +42,6 @@ * *T*: arbitrary supported type. -**Detailed description** -Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *Less* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} < b_{i} -\f] - **Examples** *Example 1* diff --git a/docs/ops/condition/If_8.md b/docs/ops/condition/If_8.md new file mode 100644 index 00000000000000..7de2449b1eada1 --- /dev/null +++ b/docs/ops/condition/If_8.md @@ -0,0 +1,226 @@ +## If {#openvino_docs_ops_infrastructure_If_8} + +**Versioned name**: *If-8* + +**Category**: Infrastructure + +**Short description**: *If* operation contains two internal networks(subgraphs) such as `then_body` and `else_body`, +and performs one of them depending on `cond` value. If `cond` is `True`, `then_body` is executed. If `cond` is `False`, +the operation executes the `else_body` subgraph. + +**Detailed description** + +*If* must not contain empty subgraphs. Each of them must have at least one operation `Result`. +Also the number of outputs from *If* always must be greater than zero and equal to the number of outputs from each subgraph. + +**If attributes**: + +* **Subgraphs**: + + `then_body`/`else_body` are subgraphs that are executed depending on the `cond` value. + The subgraph is described operation by operation as a typical IR network. + The subgraph has inputs (`Parameter` operations) and outputs (`Result` operations). + + * **Subgraph's inputs** - inputs to the subgraph which associated with *If* inputs via *port_map*. + The subgraph can have any number of inputs (even zero). + + * **Subgraph's outputs** - outputs from the subgraph which associated with *If* outputs via *port_map*. + The subgraph must contain at least one output. Each *If* output is associated with one output from the subgraph. + Therefore the number of `then_body` outputs is equal to the number of outputs from *If* and + the number of `else_body` outputs. + The type of the subgraph output and the type of the associated output from *If* must be equal. + + +* **Port maps**: + + *port_map* is a set of rules to map input or output data tensors of *If* operation onto the subgraph data tensors. + The `port_map` entries can be `input` and `output`. Each entry describes a corresponding mapping rule. + *If* has two *port_maps*: `then_port_map` for `then_body` and `else_port_map` for `else_body`. + + * **Port map attributes**: + + * *external_port_id* + * **Description**: *external_port_id* is a port ID of *If* operation. + * **Range of values**: IDs of the *If* inputs and outputs + * **Type**: `unsigned int` + * **Default value**: None + * **Required**: *yes* + + * *internal_layer_id* + + * **Description**: *internal_layer_id* is a `Parameter` or `Result` operation ID inside + the subgraph to map to. + * **Range of values**: IDs of the `Parameter` or `Result` operations in the subgraph + * **Type**: `unsigned int` + * **Default value**: None + * **Required**: *yes* + +**If Inputs** + + +* **cond**: A scalar or 1D tensor with 1 element of `boolean` type specifying which subgraph to execute. +`True` value means to execute the `then_body`, `False` - `else_body`. *Required*. + +* **Multiple other inputs**: Tensors of different types and shapes. *Optional*. + +**If Outputs** + +* **Multiple outputs**: Results of execution of one of the subgraph. Tensors of any type and shape. + + +**Body Inputs** + +* **Multiple inputs**: Tensors of different types and shapes. *Optional*. + + +**Body Outputs** + +* **Multiple outputs**: Results of execution of the subgraph. Tensors of any type and shape. + + +**Examples** + +*Example 1: a typical If structure* +```xml + + + + + 2 + 4 + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + + + + + + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + 2 + 4 + + + + + + + + + + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + 2 + 4 + + + + + + + + + + + +``` diff --git a/docs/ops/condition/Select_1.md b/docs/ops/condition/Select_1.md index 8f51624961078e..56e5fde8eab790 100644 --- a/docs/ops/condition/Select_1.md +++ b/docs/ops/condition/Select_1.md @@ -17,26 +17,31 @@ * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. - * **Type**: string + * *none* - no auto-broadcasting is allowed, all input shapes must match + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * **Type**: `string` * **Default value**: "numpy" * **Required**: *no* **Inputs**: -* **1**: `cond` tensor with selection mask of type `boolean`. The tensor can be 0D. +* **1**: `cond` - tensor of type *T_COND* and arbitrary shape with selection mask. **Required**. -* **2**: `then` the tensor with elements to take where the corresponding element in `cond` is true. Arbitrary type that should match type of `else` input tensor. +* **2**: `then` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `true`. **Required**. -* **3**: `else` the tensor with elements to take where the corresponding element in `cond` is false. Arbitrary type that should match type of `then` input tensor. +* **3**: `else` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `false`. **Required**. **Outputs**: * **1**: blended output tensor that is tailored from values of inputs tensors `then` and `else` based on `cond` and broadcasting rules. It has the same type of elements as `then` and `else`. +**Types** + +* *T_COND*: `boolean` type. +* *T*: any supported numeric type. **Example** diff --git a/docs/ops/detection/PriorBoxClustered_1.md b/docs/ops/detection/PriorBoxClustered_1.md index 4f3f380252ed42..3049f851949359 100644 --- a/docs/ops/detection/PriorBoxClustered_1.md +++ b/docs/ops/detection/PriorBoxClustered_1.md @@ -6,31 +6,67 @@ **Short description**: *PriorBoxClustered* operation generates prior boxes of specified sizes normalized to the input image size. +**Detailed description** + +Let +\f[ +W \equiv image\_width, \quad H \equiv image\_height. +\f] + +Then calculations of *PriorBoxClustered* can be written as + \f[ + center_x=(w+offset)*step + \f] + \f[ + center_y=(h+offset)*step + \f] + \f[ + w \subset \left( 0, W \right ) + \f] + \f[ + h \subset \left( 0, H \right ) + \f] +For each \f$s = \overline{0, W - 1}\f$ calculates the prior boxes coordinates: + \f[ + xmin = \frac{center_x - \frac{width_s}{2}}{W} + \f] + \f[ + ymin = \frac{center_y - \frac{height_s}{2}}{H} + \f] + \f[ + xmax = \frac{center_x - \frac{width_s}{2}}{W} + \f] + \f[ + ymax = \frac{center_y - \frac{height_s}{2}}{H} + \f] +If *clip* is defined, the coordinates of prior boxes are recalculated with the formula: +\f$coordinate = \min(\max(coordinate,0), 1)\f$ + **Attributes** * *width (height)* * **Description**: *width (height)* specifies desired boxes widths (heights) in pixels. * **Range of values**: floating-point positive numbers - * **Type**: float[] + * **Type**: `float[]` * **Default value**: 1.0 * **Required**: *no* * *clip* - * **Description**: *clip* is a flag that denotes if each value in the output tensor should be clipped within [0,1]. + * **Description**: *clip* is a flag that denotes if each value in the output tensor should be clipped within `[0,1]`. * **Range of values**: * false or 0 - clipping is not performed - * true or 1 - each value in the output tensor is within [0,1] - * **Type**: boolean + * true or 1 - each value in the output tensor is within `[0,1]` + * **Type**: `boolean` * **Default value**: true * **Required**: *no* * *step (step_w, step_h)* - * **Description**: *step (step_w, step_h)* is a distance between box centers. For example, *step* equal 85 means that the distance between neighborhood prior boxes centers is 85. If both *step_h* and *step_w* are 0 then they are updated with value of *step*. If after that they are still 0 then they are calculated as input image width(height) divided with first input width(height). + * **Description**: *step (step_w, step_h)* is a distance between box centers. For example, *step* equal 85 means that the distance between neighborhood prior boxes centers is 85. If both *step_h* and *step_w* are 0 then they are updated with value of *step*. If after that they are still 0 then they are calculated as input image width(height) divided with first input width(height). * **Range of values**: floating-point positive number - * **Type**: float + * **Type**: `float` * **Default value**: 0.0 * **Required**: *no* @@ -38,72 +74,31 @@ * **Description**: *offset* is a shift of box respectively to top left corner. For example, *offset* equal 85 means that the shift of neighborhood prior boxes centers is 85. * **Range of values**: floating-point positive number - * **Type**: float + * **Type**: `float` * **Required**: *yes* * *variance* - * **Description**: *variance* denotes a variance of adjusting bounding boxes. + * **Description**: *variance* denotes a variance of adjusting bounding boxes. The attribute could be 0, 1 or 4 elements. * **Range of values**: floating-point positive numbers - * **Type**: float[] + * **Type**: `float[]` * **Default value**: [] * **Required**: *no* -* *img_h (img_w)* - - * **Description**: *img_h (img_w)* specifies height (width) of input image. These attributes are taken from the second input `image_size` height(width) unless provided explicitly as the value for this attributes. - * **Range of values**: floating-point positive number - * **Type**: float - * **Default value**: 0 - * **Required**: *no* - **Inputs**: -* **1**: `output_size` - 1D tensor with two integer elements `[height, width]`. Specifies the spatial size of generated grid with boxes. **Required.** +* **1**: `output_size` - 1D tensor of type *T_INT* with two elements `[height, width]`. Specifies the spatial size of generated grid with boxes. Required. -* **2**: `image_size` - 1D tensor with two integer elements `[image_height, image_width]` that specifies shape of the image for which boxes are generated. **Optional.** +* **2**: `image_size` - 1D tensor of type *T_INT* with two elements `[image_height, image_width]` that specifies shape of the image for which boxes are generated. Optional. **Outputs**: -* **1**: 2D tensor of shape `[2, 4 * height * width * priors_per_point]` with box coordinates. The `priors_per_point` is the number of boxes generated per each grid element. The number depends on layer attribute values. +* **1**: 2D tensor of shape `[2, 4 * height * width * priors_per_point]` and type *T_OUT* with box coordinates. The `priors_per_point` is the number of boxes generated per each grid element. The number depends on layer attribute values. -**Detailed description** +**Types** -*PriorBoxClustered* computes coordinates of prior boxes by following: -1. Calculates the *center_x* and *center_y* of prior box: - \f[ - W \equiv Width \quad Of \quad Image - \f] - \f[ - H \equiv Height \quad Of \quad Image - \f] - \f[ - center_x=(w+offset)*step - \f] - \f[ - center_y=(h+offset)*step - \f] - \f[ - w \subset \left( 0, W \right ) - \f] - \f[ - h \subset \left( 0, H \right ) - \f] -2. For each \f$s \subset \left( 0, W \right )\f$ calculates the prior boxes coordinates: - \f[ - xmin = \frac{center_x - \frac{width_s}{2}}{W} - \f] - \f[ - ymin = \frac{center_y - \frac{height_s}{2}}{H} - \f] - \f[ - xmax = \frac{center_x - \frac{width_s}{2}}{W} - \f] - \f[ - ymax = \frac{center_y - \frac{height_s}{2}}{H} - \f] -If *clip* is defined, the coordinates of prior boxes are recalculated with the formula: -\f$coordinate = \min(\max(coordinate,0), 1)\f$ +* *T_INT*: any supported integer type. +* *T_OUT*: supported floating-point type. **Example** diff --git a/docs/ops/generation/RandomUniform_8.md b/docs/ops/generation/RandomUniform_8.md new file mode 100644 index 00000000000000..4269c82bc6a8aa --- /dev/null +++ b/docs/ops/generation/RandomUniform_8.md @@ -0,0 +1,231 @@ +## RandomUniform {#openvino_docs_ops_generation_RandomUniform_8} + +**Versioned name**: *RandomUniform-8* + +**Category**: Generation + +**Short description**: *RandomUniform* operation generates a sequence of random values from a uniform distribution. + +**Detailed description**: + +*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`. +The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm +is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns +four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized +with *seed* and *seed2* attributes respectively. + +\f[ +key = seed\\ +counter = seed2 +\f] + +Link to the original paper [Parallel Random Numbers: As Easy as 1, 2, 3](https://www.thesalmons.org/john/random123/papers/random123sc11.pdf) + +The result of Philox is calculated by applying a fixed number of *key* and *counter* updating so-called "rounds". +This implementation uses 4x32_10 version of Philox algorithm, where number of rounds = 10. + +Suppose we have *n* which determines *n*-th 4 elements of random sequence. +In each round *key*, *counter* and *n* are splitted to pairs of uint32 values: + +\f[ +R = cast\_to\_uint32(value)\\ +L = cast\_to\_uint32(value >> 32), +\f] +where *cast\_to\_uint32* - static cast to uint32, *value* - uint64 input value, *L*, *R* - uint32 +result values, >> - bitwise right shift. + +Then *n* and *counter* are updated with the following formula: + +\f[ +L'= mullo(R, M)\\ +R' = mulhi(R, M) {\oplus} k {\oplus} L \\ +mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\ +mullo(a, b) = (a {\times} b) \mod 2^{32} +\f] +where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*, +*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*. + +After each round *key* is raised by summing with another pair of const values: +\f[ +L += 0x9E3779B9 \\ +R += 0xBB67AE85 +\f] +Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers. + +Float values between [0..1) are obtained from 32-bit integers by the following rules. + +Float16 is formatted as follows: *sign*(1 bit) *exponent*(5 bits) *mantissa*(10 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 15} +\f] + +so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 15 - representation of a zero exponent. +mantissa = 10 right bits from generated uint32 random value. +``` + +So the resulting float16 value is: +``` +x_uint16 = x // Truncate the upper 16 bits. +val = ((exponent << 10) | x_uint16 & 0x3ffu) - 1.0, +``` +where x is uint32 generated random value. + +Float32 is formatted as follows: *sign*(1 bit) *exponent*(8 bits) *mantissa*(23 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 127} +\f] + +so to obtain float values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 127 - representation of a zero exponent. +mantissa = 23 right bits from generated uint32 random value. +``` + +So the resulting float value is: +``` +val = ((exponent << 23) | x & 0x7fffffu) - 1.0, +``` +where x is uint32 generated random value. + +Double is formatted as follows: *sign*(1 bit) *exponent*(11 bits) *mantissa*(52 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 1023} +\f] + +so to obtain double values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 1023 - representation of a zero exponent. +mantissa = 52 right bits from two concatinated uint32 values from random integer generator. +``` + +So the resulting double is obtained as follows: +``` +mantissa_h = x0 & 0xfffffu; // upper 20 bits of mantissa +mantissa_l = x1; // lower 32 bits of mantissa +mantissa = (mantissa_h << 32) | mantissa_l; +val = ((exponent << 52) | mantissa) - 1.0, +``` +where x0, x1 are uint32 generated random values. + +To obtain a value in a specified range each value is processed with the following formulas: + +For float values: +\f[ +result = x * (maxval - minval) + minval, +\f] +where *x* is random float or double value between [0..1). + +For integer values: +\f[ +result = x \mod (maxval - minval) + minval, +\f] +where *x* is uint32 random value. + + +Example 1. *RandomUniform* output with `seed` = 150, `seed2` = 10, `output_type` = f32: + +``` +input_shape = [ 3, 3 ] +output = [[0.7011236 0.30539632 0.93931055] + [0.9456035 0.11694777 0.50770056] + [0.5197197 0.22727466 0.991374 ]] +``` + +Example 2. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = double: + +``` +input_shape = [ 2, 2 ] + +minval = 2 + +maxval = 10 + +output = [[5.65927959 4.23122376] + [2.67008206 2.36423758]] +``` + +Example 3. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = i32: + +``` +input_shape = [ 2, 3 ] + +minval = 50 + +maxval = 100 + +output = [[65 70 56] + [59 82 92]] +``` + +**Attributes**: + +* *output_type* + + * **Description**: the type of the output. Determines generation algorithm and affects resulting values. + Output numbers generated for different values of *output_type* may not be equal. + * **Range of values**: "i32", "i64", "f16", "bf16", "f32", "f64". + * **Type**: string + * **Required**: *Yes* + +* *seed* + + * **Description**: global seed value. + * **Range of values**: positive integers + * **Type**: `int` + * **Required**: *Yes* + +* *seed2* + + * **Description**: operational seed value. + * **Range of values**: positive integers + * **Type**: `int` + * **Required**: *Yes* + +**Inputs**: + +* **1**: `shape` - 1D tensor of type *T_SHAPE* describing output shape. **Required.** + +* **2**: `minval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, + defines the lower bound on the range of random values to generate (inclusive). **Required.** + +* **3**: `maxval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, + defines the upper bound on the range of random values to generate (exclusive). **Required.** + + +**Outputs**: + +* **1**: A tensor with type specified by the attribute *output_type* and shape defined by `shape` input tensor. + +**Types** + +* *T_SHAPE*: `int32` or `int64`. + +*Example 1: IR example.* + +```xml + + + + + 3 + + + + + + + 2 + 3 + 10 + + + +``` diff --git a/docs/ops/logical/LogicalAnd_1.md b/docs/ops/logical/LogicalAnd_1.md index 4f39b236fefbb7..a653d1abbc29f2 100644 --- a/docs/ops/logical/LogicalAnd_1.md +++ b/docs/ops/logical/LogicalAnd_1.md @@ -6,39 +6,40 @@ **Short description**: *LogicalAnd* performs element-wise logical AND operation with two given tensors applying multi-directional broadcast rules. +**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} \wedge b_{i} +\f] + **Attributes**: * *auto_broadcast* * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. + * *none* - no auto-broadcasting is allowed, all input shapes must match, + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md), + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md). * **Type**: string * **Default value**: "numpy" * **Required**: *no* **Inputs** -* **1**: A tensor of type *T*. **Required.** -* **2**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** +* **2**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise logical AND operation. A tensor of type boolean. +* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type boolean. **Types** * *T*: boolean type. -**Detailed description** -Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} and b_{i} -\f] **Examples** diff --git a/docs/ops/normalization/GRN_1.md b/docs/ops/normalization/GRN_1.md index 0ea7cf3c30b300..de796e681a78f7 100644 --- a/docs/ops/normalization/GRN_1.md +++ b/docs/ops/normalization/GRN_1.md @@ -8,7 +8,7 @@ **Detailed description**: -*GRN* computes the L2 norm by channels for input tensor with shape `[N, C, ...]`. *GRN* does the following with the input tensor: +*GRN* computes the L2 norm across channels for input tensor with shape `[N, C, ...]`. *GRN* does the following with the input tensor: output[i0, i1, ..., iN] = x[i0, i1, ..., iN] / sqrt(sum[j = 0..C-1](x[i0, j, ..., iN]**2) + bias) @@ -16,23 +16,27 @@ * *bias* - * **Description**: *bias* is added to the variance. - * **Range of values**: a non-negative floating-point value + * **Description**: *bias* is added to the sum of squares. + * **Range of values**: a positive floating-point number * **Type**: `float` * **Required**: *yes* **Inputs** -* **1**: Input tensor with element of any floating-point type and `2 <= rank <=4`. **Required.** +* **1**: `data` - A tensor of type *T* and `2 <= rank <= 4`. **Required.** **Outputs** -* **1**: Output tensor of the same type and shape as the input tensor. +* **1**: The result of *GRN* function applied to `data` input tensor. Normalized tensor of the same type and shape as the data input. + +**Types** + +* *T*: arbitrary supported floating-point type. **Example** ```xml - + diff --git a/docs/ops/normalization/MVN_1.md b/docs/ops/normalization/MVN_1.md index ef8a37204dd82f..a82c9a9ca40531 100644 --- a/docs/ops/normalization/MVN_1.md +++ b/docs/ops/normalization/MVN_1.md @@ -4,57 +4,89 @@ **Category**: *Normalization* -**Short description**: [Reference](http://caffe.berkeleyvision.org/tutorial/layers/mvn.html) +**Short description**: Calculates mean-variance normalization of the input tensor. Supports two normalization techniques: [Instance/Contrast Normalization](https://arxiv.org/abs/1607.08022) and [Layer Normalization](https://arxiv.org/abs/1607.06450). **Detailed description** -*MVN* subtracts mean value from the input blob: +Based on `across_channels` attribute mean value is calculated using one of formulas below: + +1. if `true` mean value is calculated using Layer Normalization: +\f[ +\mu_{n} = \frac{\sum_{c}^{C}\sum_{h}^{H}\sum_{w}^{W} i_{nchw}}{C * H * W} +\f] +2. if `false` mean value is calculated using Instance/Contrast Normalization: \f[ -o_{i} = i_{i} - \frac{\sum{i_{k}}}{C * H * W} +\mu_{nc} = \frac{\sum_{h}^{H}\sum_{w}^{W} i_{nchw}}{H * W} \f] -If *normalize_variance* is set to 1, the output blob is divided by variance: + +where \f$i_{nchw}\f$ is an input tensor parametrized by \f$n\f$ batches, \f$c\f$ channels and \f$h,w\f$ spatial dimesnions. + +If `reduction_axes` attribute is provided mean value is calculated based on formula: \f[ -o_{i}=\frac{o_{i}}{\sum \sqrt {o_{k}^2}+\epsilon} +\mu_{n} = ReduceMean(i_{k}, reduction_axes) \f] +Afterwards *MVN* subtracts mean value from the input blob. + +If *normalize_variance* is set to `true`, the output blob is divided by variance: +\f[ +o_{i}=\frac{o_{i}}{\sqrt {\sum {\sigma_{k}^2}+\epsilon}} +\f] + +where \f$\sigma_{k}^2\f$ is the variance calculated based on mean value, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute. + **Attributes** * *across_channels* - * **Description**: *across_channels* is a flag that specifies whether mean values are shared across channels. For example, *across_channels* equal to `false` means that mean values are not shared across channels. + * **Description**: *across_channels* is a flag that specifies whether mean values are shared across channels. If `true` mean values and variance are calculated for each sample across all channels and spatial dimensions (Layer Normalization), otherwise calculation is done for each sample and for each channel across spatial dimensions (Instance/Contrast Normalization). * **Range of values**: * `false` - do not share mean values across channels * `true` - share mean values across channels * **Type**: `boolean` - * **Default value**: `false` - * **Required**: *no* + * **Required**: *yes* + +* *reduction_axes* + + * **Description**: 1D tensor of unique elements and type *T_IND* which specifies indices of dimensions in `data` that define normalization slices. Negative value means counting dimensions from the back. + * **Range of values**: allowed range of axes is `[-r; r-1]` where `r = rank(data)`, the order cannot be sorted + * **Type**: `int` + * **Required**: *yes* * *normalize_variance* * **Description**: *normalize_variance* is a flag that specifies whether to perform variance normalization. * **Range of values**: - * `false` -- do not normalize variance - * `true` -- normalize variance + * `false` - do not normalize variance + * `true` - normalize variance * **Type**: `boolean` - * **Default value**: `false` - * **Required**: *no* + * **Required**: *yes* * *eps* * **Description**: *eps* is the number to be added to the variance to avoid division by zero when normalizing the value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance. * **Range of values**: a positive floating-point number - * **Type**: `float` + * **Type**: `double` * **Required**: *yes* +* **Note** Important: it is necessary to use only one of `across_channels` or `reduction_axes` attributes, they cannot be defined together. + **Inputs** -* **1**: 4D or 5D input tensor of any floating-point type. **Required.** +* **1**: `data` - input tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: normalized tensor of the same type and shape as input tensor. +* **1**: normalized tensor of type *T* and shape as input tensor. -**Example** +**Types** + +* *T*: any floating point type. +* *T_IND*: `int64` or `int32`. + +**Examples** + +*Example: with `across_channels` attribute* ```xml @@ -77,3 +109,27 @@ o_{i}=\frac{o_{i}}{\sum \sqrt {o_{k}^2}+\epsilon} ``` + +*Example: with `reduction_axes` attribute* + +```xml + + + + + 6 + 12 + 10 + 24 + + + + + 6 + 12 + 10 + 24 + + + +``` diff --git a/docs/ops/normalization/MVN_6.md b/docs/ops/normalization/MVN_6.md index 9de691458c462d..f89cf60e92df7e 100644 --- a/docs/ops/normalization/MVN_6.md +++ b/docs/ops/normalization/MVN_6.md @@ -30,8 +30,8 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} * **Description**: *normalize_variance* is a flag that specifies whether to perform variance normalization. * **Range of values**: - * `false` -- Do not normalize variance - * `true` -- Normalize variance + * `false` - do not normalize variance + * `true` - normalize variance * **Type**: `boolean` * **Required**: *yes* @@ -46,14 +46,14 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} * **Description**: Choose where to add epsilon. * **Range of values**: - * `inside_sqrt` -- Add epsilon inside sqrt - * `outside_sqrt` -- Add epsilon outside of sqrt + * `inside_sqrt` - add epsilon inside sqrt + * `outside_sqrt` - add epsilon outside of sqrt * **Type**: `string` * **Required**: *yes* **Inputs** -* **1**: `data` - Input tensor to be normalized. Type *T*. **Required.** +* **1**: `data` - Input tensor to be normalized of type *T* and arbitrary shape. **Required.** * **2**: `axes` - 1D tensor which specifies indices of dimensions in `data` that define normalization slices. Allowed range of axes is `[-r; r-1]` where `r = rank(data)`, the order can be not sorted. Negative value means counting dimensions from the back. Type *T_IND*. **Required.** @@ -63,8 +63,7 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} **Types** -* *T*: any floating-point type. - +* *T*: any floating point type. * *T_IND*: `int64` or `int32`. **Example** diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md index 02e97eab4e42f6..4c71a0bb2fa7fc 100644 --- a/docs/ops/opset8.md +++ b/docs/ops/opset8.md @@ -79,6 +79,7 @@ declared in `namespace opset8`. * [HSigmoid](activation/HSigmoid_5.md) * [HSwish](activation/HSwish_4.md) * [IDFT](signals/IDFT_7.md) +* [If](condition/If_8.md) * [Interpolate](image/Interpolate_4.md) * [Less](comparison/Less_1.md) * [LessEqual](comparison/LessEqual_1.md) @@ -114,6 +115,7 @@ declared in `namespace opset8`. * [PriorBox](detection/PriorBox_1.md) * [Proposal](detection/Proposal_4.md) * [PSROIPooling](detection/PSROIPooling_1.md) +* [RandomUniform](generation/RandomUniform_8.md) * [Range](generation/Range_4.md) * [ReLU](activation/ReLU_1.md) * [ReadValue](infrastructure/ReadValue_3.md) diff --git a/docs/template_extension/cpu_kernel.cpp b/docs/template_extension/cpu_kernel.cpp index aa2486589cbff2..b1d426b15825ce 100644 --- a/docs/template_extension/cpu_kernel.cpp +++ b/docs/template_extension/cpu_kernel.cpp @@ -102,6 +102,7 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_extension/fft_kernel.cpp b/docs/template_extension/fft_kernel.cpp index 12554a70c75406..3fcf71a8f641b1 100644 --- a/docs/template_extension/fft_kernel.cpp +++ b/docs/template_extension/fft_kernel.cpp @@ -66,6 +66,7 @@ InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_plugin/tests/functional/op_reference/acosh.cpp b/docs/template_plugin/tests/functional/op_reference/acosh.cpp new file mode 100644 index 00000000000000..e854c98b7e0f7a --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/acosh.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; + +namespace reference_tests { +namespace { + +struct AcoshParams { + Tensor input; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceAcoshLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input.shape, params.input.type); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape << "_"; + result << "type=" << param.input.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Shape& shape, const element::Type& type) { + const auto in = std::make_shared(type, shape); + const auto acosh = std::make_shared(in); + return std::make_shared(NodeVector {acosh}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceAcoshLayerTest, AcoshWithHardcodedRefs) { + Exec(); +} + +} // namespace + +INSTANTIATE_TEST_SUITE_P( + smoke_Acosh_With_Hardcoded_Refs, ReferenceAcoshLayerTest, + ::testing::Values(Builder {} + .input({{8}, element::f16, std::vector {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}}) + .expected({{8}, element::f16, std::vector {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}), + Builder {} + .input({{8}, element::f32, std::vector {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}}) + .expected({{8}, element::f32, std::vector {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}), + Builder {} + .input({{8}, element::i32, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::i32, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::i64, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::i64, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::u32, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::u32, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::u64, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::u64, std::vector {0, 1, 2, 2, 2, 3, 5, 8}})), + ReferenceAcoshLayerTest::getTestCaseName); +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp index 51af4d2ea1a221..f2d2cf68aa39a2 100644 --- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp +++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp @@ -9,6 +9,8 @@ using namespace InferenceEngine; +namespace reference_tests { + CommonReferenceTest::CommonReferenceTest(): targetDevice("TEMPLATE") { core = PluginCache::get().ie(targetDevice); } @@ -171,3 +173,5 @@ void CommonReferenceTest::ValidateBlobs(const InferenceEngine::Blob::Ptr& refBlo FAIL() << "Comparator for " << precision << " precision isn't supported"; } } + +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp index 6e3fd942a9e722..de08533405e566 100644 --- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp +++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp @@ -5,8 +5,12 @@ #include #include #include +#include +#include #include +namespace reference_tests { + class CommonReferenceTest { public: CommonReferenceTest(); @@ -51,3 +55,55 @@ InferenceEngine::Blob::Ptr CreateBlob(const ngraph::element::Type& element_type, return blob; } +/// +/// Class which should help to build data for single input +/// +struct Tensor { + Tensor() = default; + + Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const InferenceEngine::Blob::Ptr& data): shape {shape}, type {type}, data {data} {} + + template + Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const std::vector& data_elements) + : Tensor {shape, type, CreateBlob(type, data_elements)} {} + + ngraph::Shape shape; + ngraph::element::Type type; + InferenceEngine::Blob::Ptr data; +}; + +/// +/// Class which should helps build test parameters. +/// +/// e.g.: +/// struct Params { +/// Tensor i,o; +/// int mul; +/// }; +/// struct TestParamsBuilder : ParamsBuilder +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, i); +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, o); +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, mul); +/// }; +/// +/// const Params p = TestParamsBuilder{} +/// .i(Tensor{{0}, i32, {1}}) +/// .o(Tensor{{0}, i32, {1}}) +/// .mul(10); +template +class ParamsBuilder { +protected: + Params params; + +public: + operator Params() const { + return params; + } +}; +#define REFERENCE_TESTS_ADD_SET_PARAM(builder_type, param_to_set) \ + builder_type& param_to_set(decltype(params.param_to_set) t) { \ + params.param_to_set = std::move(t); \ + return *this; \ + } + +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/comparison.hpp b/docs/template_plugin/tests/functional/op_reference/comparison.hpp new file mode 100644 index 00000000000000..0d520b73ba29a8 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/comparison.hpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" +#include "ngraph_functions/builders.hpp" + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { + +struct RefComparisonParams { + ngraph::helpers::ComparisonTypes compType; + Tensor input1; + Tensor input2; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, compType); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input1); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input2); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceComparisonLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + const auto& params = GetParam(); + function = CreateFunction(params.compType, params.input1.shape, params.input2.shape, params.input1.type, params.expected.type); + inputData = {params.input1.data, params.input2.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& param = obj.param; + std::ostringstream result; + result << "comparisonType=" << param.compType << "_"; + result << "inpt_shape1=" << param.input1.shape << "_"; + result << "inpt_shape2=" << param.input2.shape << "_"; + result << "iType=" << param.input1.type << "_"; + result << "oType=" << param.expected.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(ngraph::helpers::ComparisonTypes comp_op_type, const ngraph::PartialShape& input_shape1, + const ngraph::PartialShape& input_shape2, const ngraph::element::Type& input_type, + const ngraph::element::Type& expected_output_type) { + const auto in = std::make_shared(input_type, input_shape1); + const auto in2 = std::make_shared(input_type, input_shape2); + const auto comp = ngraph::builder::makeComparison(in, in2, comp_op_type); + return std::make_shared(ngraph::NodeVector {comp}, ngraph::ParameterVector {in, in2}); + } +}; +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/convert.cpp b/docs/template_plugin/tests/functional/op_reference/convert.cpp index fb32fda4cbbfd8..b8e6f5846f7408 100644 --- a/docs/template_plugin/tests/functional/op_reference/convert.cpp +++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp @@ -12,6 +12,7 @@ #include "base_reference_test.hpp" +using namespace reference_tests; using namespace ngraph; using namespace InferenceEngine; diff --git a/docs/template_plugin/tests/functional/op_reference/equal.cpp b/docs/template_plugin/tests/functional/op_reference/equal.cpp new file mode 100644 index 00000000000000..d80ec3271fb6d8 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/equal.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { + +TEST_P(ReferenceComparisonLayerTest, EqualCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {0, 0, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {1, 1, 1, 1, 0, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 1, 0, 1}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/erf.cpp b/docs/template_plugin/tests/functional/op_reference/erf.cpp new file mode 100644 index 00000000000000..bd888a8e03c90f --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/erf.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct ErfParams { + template + ErfParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector& iValues) + : pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) { + std::vector oValues; + std::vector output; + for (auto element : iValues) + output.push_back(static_cast(element)); + + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::erf(input); + }); + + if (std::is_integral()) { + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::round(input); + }); + } + + for (auto element : output) + oValues.push_back(static_cast(element)); + refData = CreateBlob(outType, oValues); + } + ngraph::PartialShape pshape; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceErfLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType, params.outType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type, + const element::Type& expected_output_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto erf = std::make_shared(in); + return std::make_shared(NodeVector {erf}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceErfLayerTest, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Erf_With_Hardcoded_Refs, ReferenceErfLayerTest, + ::testing::Values(ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f32, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f16, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i32, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u32, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i64, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u64, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()})), + ReferenceErfLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/grn.cpp b/docs/template_plugin/tests/functional/op_reference/grn.cpp new file mode 100644 index 00000000000000..e7fc0c79f6b82b --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/grn.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +namespace { +struct GrnParams { + template + GrnParams(const float bias, const PartialShape& shape, const element::Type& iType, const std::vector& iValues, + const std::vector& oValues) + : bias(bias), pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {} + float bias; + PartialShape pshape; + element::Type inType; + element::Type outType; + Blob::Ptr inputData; + Blob::Ptr refData; +}; + +class ReferenceGrnLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.bias, params.pshape, params.inType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "bias=" << param.bias << "_"; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(float bias, const PartialShape& input_shape, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto grn = std::make_shared(in, bias); + return std::make_shared(NodeVector {grn}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceGrnLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateGrnParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector grnParams { + // bias 1e-6 // 2D // 3D // 4D + GrnParams(1e-6, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + std::vector {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}), + GrnParams(1e-6, PartialShape {2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + std::vector {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784, + 0.433574, 0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}), + GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, + 0.997055, 0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}), + GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, + std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, + 0.997055, 0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427, + 0.559857, 0.564684, 0.56921, 0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6, + 0.828589, 0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}), + // bias 100.25 // 2D // 3D // 4D + GrnParams(100.25, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + std::vector {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}), + GrnParams(100.25, PartialShape {2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + std::vector {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641, + 0.41125, 0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503, 0.547945, 0.664327, 0.662047, 0.659774, 0.657534}), + GrnParams(100.25, PartialShape {1, 2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, + 0.790789, 0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}), + GrnParams(100.25, PartialShape {2, 2, 3, 4}, type, + std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, + std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, + 0.790789, 0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989, + 0.546293, 0.551788, 0.556938, 0.561772, 0.566319, 0.570601, 0.574641, 0.578458, 0.582069, 0.585489, 0.588734, 0.591816, + 0.808514, 0.80646, 0.804466, 0.802532, 0.800658, 0.798842, 0.797083, 0.795379, 0.79373, 0.792133, 0.790586, 0.789088})}; + return grnParams; +} + +std::vector generateGrnCombinedParams() { + const std::vector> grnTypeParams {generateGrnParams(element::bf16), + generateGrnParams(element::f16), + generateGrnParams(element::f32)}; + std::vector combinedParams; + std::for_each(grnTypeParams.begin(), grnTypeParams.end(), [&](std::vector params) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + }); + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_GRN_With_Hardcoded_Refs, ReferenceGrnLayerTest, ::testing::ValuesIn(generateGrnCombinedParams()), + ReferenceGrnLayerTest::getTestCaseName); +} // namespace diff --git a/docs/template_plugin/tests/functional/op_reference/less.cpp b/docs/template_plugin/tests/functional/op_reference/less.cpp new file mode 100644 index 00000000000000..5d01cdfab64198 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/less.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceComparisonLayerTest, LessCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {1, 1, 0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {0, 0, 0, 0, 1, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 0, 0, 0}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +} // namespace +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/less_eq.cpp b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp new file mode 100644 index 00000000000000..f530867f847f5d --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceComparisonLayerTest, LessEqualCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {1, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {1, 1, 1, 1, 1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 1, 0, 1}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +} // namespace +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/logical_and.cpp b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp new file mode 100644 index 00000000000000..0313874533e97b --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + + +struct LogicalAndParams { + template + LogicalAndParams(const ngraph::PartialShape& input_shape1, const ngraph::PartialShape& input_shape2 , + const std::vector& iValues1, const std::vector& iValues2, const std::vector& oValues) + : pshape1(input_shape1), pshape2(input_shape2), inType(ngraph::element::boolean), outType(ngraph::element::boolean), + inputData1(CreateBlob(ngraph::element::boolean, iValues1)), inputData2(CreateBlob(ngraph::element::boolean, iValues2)), + refData(CreateBlob(ngraph::element::boolean, oValues)) {} + ngraph::PartialShape pshape1; + ngraph::PartialShape pshape2; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData1; + InferenceEngine::Blob::Ptr inputData2; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceLogicalAndLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape1, params.pshape2, params.inType); + inputData = {params.inputData1, params.inputData2}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "input_shape1=" << param.pshape1 << "_"; + result << "input_shape2=" << param.pshape2 << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape1, + const PartialShape& input_shape2, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape1); + const auto in2 = std::make_shared(input_type, input_shape2); + const auto logical_and = std::make_shared(in, in2); + return std::make_shared(NodeVector {logical_and}, ParameterVector {in, in2}); + } +}; + +TEST_P(ReferenceLogicalAndLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalAndLayerTest, + ::testing::Values( + LogicalAndParams(ngraph::PartialShape {2, 2}, ngraph::PartialShape {2, 2}, + std::vector {true, false, true, false}, + std::vector {false, true, true, false}, + std::vector {false, false, true, false}), + LogicalAndParams(ngraph::PartialShape {2, 1, 2, 1}, ngraph::PartialShape {1, 1, 2, 1}, + std::vector {true, false, true, false}, + std::vector {true, false}, + std::vector {true, false, true, false}), + LogicalAndParams(ngraph::PartialShape {3, 4}, ngraph::PartialShape {3, 4}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, true}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, false}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, false})), + ReferenceLogicalAndLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/mvn.cpp b/docs/template_plugin/tests/functional/op_reference/mvn.cpp new file mode 100644 index 00000000000000..5321164807b852 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/mvn.cpp @@ -0,0 +1,254 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +// ------------------------------ V0 ------------------------------ + +struct MVN1Params { + MVN1Params(const Tensor& paramInput, const ngraph::AxisSet& paramReductionAxes, const bool paramAcrossChannels, const bool paramNormalizeVariance, + const double paramEps, const Tensor& paramExpected) + : input(paramInput), + reductionAxes(paramReductionAxes), + acrossChannels(paramAcrossChannels), + normalizeVariance(paramNormalizeVariance), + eps(paramEps), + expected(paramExpected) {} + Tensor input; + ngraph::AxisSet reductionAxes; + bool acrossChannels; + bool normalizeVariance; + double eps; + Tensor expected; +}; + +class ReferenceMVN1LayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input, params.reductionAxes, params.acrossChannels, params.normalizeVariance, params.eps); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape; + result << "_iType=" << param.input.type; + if (!param.reductionAxes.empty()) { + result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.to_vector()); + } else { + result << "_acrossChannels=" << (param.acrossChannels ? "TRUE" : "FALSE"); + } + result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE"); + result << "_eps=" << param.eps; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Tensor& input, const ngraph::AxisSet& reductionAxes, const bool acrossChannels, + const bool normalizeVariance, const double eps) { + const auto in = std::make_shared(input.type, input.shape); + auto mvn = std::make_shared(in, acrossChannels, normalizeVariance, eps); + if (!reductionAxes.empty()) { + mvn = std::make_shared(in, reductionAxes, normalizeVariance, eps); + } + return std::make_shared(NodeVector {mvn}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceMVN1LayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +const ngraph::AxisSet emptyReductionAxes {}; + +INSTANTIATE_TEST_SUITE_P( + smoke_MVN1_With_Hardcoded_Refs, ReferenceMVN1LayerTest, + ::testing::Values( + // across_channels=false, variance=false + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + false, + false, + 1e-9, + Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {-4, -3, -2, -1, 0, 1, 2, 3, 4, -4, -3, -2, -1, 0, + 1, 2, 3, 4, -4, -3, -2, -1, 0, 1, 2, 3, 4}}), + // across_channels=true, variance=false + MVN1Params( + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}}, + emptyReductionAxes, + true, + false, + 1e-9, + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}), + // across_channels=false, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + false, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // across_channels=true, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + true, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // reductionAxes, variance=false + MVN1Params( + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}}, + {1, 2, 3}, + false, + false, + 1e-9, + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}), + // reductionAxes, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + {2, 3}, + false, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})), + ReferenceMVN1LayerTest::getTestCaseName); + +// ------------------------------ V6 ------------------------------ + +struct MVN6Params { + MVN6Params(const Tensor& paramInput, const Tensor& paramReductionAxes, const bool paramNormalizeVariance, const double paramEps, + const ngraph::op::MVNEpsMode mode, const Tensor& paramExpected) + : input(paramInput), + reductionAxes(paramReductionAxes), + normalizeVariance(paramNormalizeVariance), + eps(paramEps), + epsMode(mode), + expected(paramExpected) {} + Tensor input; + Tensor reductionAxes; + bool normalizeVariance; + double eps; + ngraph::op::MVNEpsMode epsMode; + Tensor expected; +}; + +class ReferenceMVN6LayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input, params.reductionAxes, params.normalizeVariance, params.eps, params.epsMode); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape; + result << "_iType=" << param.input.type; + result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.shape); + result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE"); + result << "_eps=" << param.eps; + result << "_eps_mode=" << param.epsMode; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Tensor& input, const Tensor& reductionAxes, const bool normalizeVariance, const double eps, + const ngraph::op::MVNEpsMode epsMode) { + std::vector dataVector(reductionAxes.shape[0]); + const auto in = std::make_shared(input.type, input.shape); + auto mRef = as(reductionAxes.data); + IE_ASSERT(mRef); + const auto refLockMemory = mRef->rmap(); + const auto refBuffer = refLockMemory.as(); + for (size_t i = 0; i < dataVector.size(); ++i) { + dataVector[i] = refBuffer[i]; + } + const auto axes = std::make_shared(reductionAxes.type, reductionAxes.shape, dataVector); + auto mvn = std::make_shared(in, axes, normalizeVariance, eps, epsMode); + return std::make_shared(NodeVector {mvn}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceMVN6LayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_MVN6_With_Hardcoded_Refs, ReferenceMVN6LayerTest, + ::testing::Values( + // variance=false, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + false, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {-4, -3, -2, -1, 0, 1, 2, 3, 4, -4, -3, -2, -1, 0, + 1, 2, 3, 4, -4, -3, -2, -1, 0, 1, 2, 3, 4}}), + // variance=true, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + true, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // variance=true, INSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + true, + 1e-9, + ngraph::op::MVNEpsMode::INSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // variance=true, another reductionAxes, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector({1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9})}, + Tensor {Shape {3}, ngraph::element::i64, std::vector({1, 2, 3})}, + true, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})), + ReferenceMVN6LayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp new file mode 100644 index 00000000000000..9baedeb3404d5e --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp @@ -0,0 +1,226 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +struct ROIPoolingParams { + template + ROIPoolingParams(const size_t iH, const size_t iW, const size_t ch, const size_t rois, + const size_t oH, const size_t oW, const float sS, const std::string mode, + const ngraph::element::Type& type, const std::vector& inputValues, + const std::vector& proposalValues, const std::vector& outputValues) + : inputH(iH), inputW(iW), channelCount(ch), roiCount(rois), outputH(oH), outputW(oW), spatialScale(sS), + poolingMode(mode), dataType(type), featureMap(CreateBlob(type, inputValues)), + proposal(CreateBlob(type, proposalValues)), refData(CreateBlob(type, outputValues)) {} + size_t inputH; + size_t inputW; + size_t channelCount; + size_t roiCount; + size_t outputH; + size_t outputW; + float spatialScale; + std::string poolingMode; + ngraph::element::Type dataType; + InferenceEngine::Blob::Ptr featureMap; + InferenceEngine::Blob::Ptr proposal; + InferenceEngine::Blob::Ptr refData; + +public: + template + inline static std::vector increasinglyFilledBlob(size_t size) { + std::vector inputValues; + T one = 1; + for (size_t i = 0; i < size; i++) { + inputValues.push_back(one * i / 10); + } + return inputValues; + } + template + inline static std::vector equallyFilledBlob(size_t size, T value) { + std::vector inputValues; + for (size_t i = 0; i < size; i++) { + inputValues.push_back(value); + } + return inputValues; + } +}; + +class ReferenceRoiPoolingLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.inputH, params.inputW, params.channelCount, params.roiCount, + params.outputH, params.outputW, params.spatialScale, params.poolingMode, params.dataType); + inputData = {params.featureMap, params.proposal}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "IS=" << param.inputH << "," << param.inputW << "_"; + result << "OS=" << param.outputH << "," << param.outputW << "_"; + result << "Ch=" << param.channelCount << "_"; + result << "Rois=" << param.roiCount << "_"; + result << "Ss=" << param.spatialScale << "_"; + result << "Mode=" << param.poolingMode << "_"; + result << "Prec=" << param.dataType << "_"; + result << std::to_string(obj.index); + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const size_t i_h, const size_t i_w, const size_t ch, const size_t roi_count, + const size_t o_h, const size_t o_w, const float spat_scale, const std::string mode, + const ngraph::element::Type& type) { + Shape feat_map_shape{1, ch, i_h, i_w}; + Shape rois_shape{roi_count, 5}; + Shape pooled_shape{o_h, o_w}; + Shape output_shape{roi_count, ch, o_h, o_w}; + + const auto feat_map = std::make_shared(type, feat_map_shape); + const auto rois = std::make_shared(type, rois_shape); + const auto roi_pooling = std::make_shared(feat_map, rois, pooled_shape, spat_scale, mode); + return std::make_shared(roi_pooling, ParameterVector{feat_map, rois}); + } +}; + +TEST_P(ReferenceRoiPoolingLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_ROIPooling_With_Hardcoded_Refs, ReferenceRoiPoolingLayerTest, + ::testing::Values( + // fp32 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f}), + // roi_pooling_2x2_bilinear_border_proposal + ROIPoolingParams(50, 50, // iH, iW + 1, 1, // channels, rois + 4, 4, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::equallyFilledBlob(1 * 50 * 50, 1), + std::vector {0.f, 0.f, 0.248046786f, 0.471333951f, 1.f}, + std::vector(16, 1.f)), + + // bf16 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 4.937f, + 1.225f, 1.645f, 4.585f, 4.937f, + 1.225f, 1.645f, 4.585f, 4.937f}), + // fp16 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f})), + ReferenceRoiPoolingLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/select.cpp b/docs/template_plugin/tests/functional/op_reference/select.cpp new file mode 100644 index 00000000000000..0cbc242c61b202 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/select.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct SelectParams { + template + SelectParams(const element::Type& data_type, const op::AutoBroadcastSpec& broadcast, const PartialShape& select_input_pshape, + const std::vector& select_input, const PartialShape& if_input_pshape, const std::vector& if_input, + const PartialShape& else_input_pshape, const std::vector& else_input, const std::vector& expected_output) + : data_type(data_type), + broadcast(broadcast), + select_input_pshape(select_input_pshape), + select_input(CreateBlob(element::boolean, select_input)), + if_input_pshape(if_input_pshape), + if_input(CreateBlob(data_type, if_input)), + else_input_pshape(else_input_pshape), + else_input(CreateBlob(data_type, else_input)), + expected_output(CreateBlob(data_type, expected_output)) {} + + element::Type data_type; + op::AutoBroadcastSpec broadcast; + PartialShape select_input_pshape; + Blob::Ptr select_input; + PartialShape if_input_pshape; + Blob::Ptr if_input; + PartialShape else_input_pshape; + Blob::Ptr else_input; + Blob::Ptr expected_output; +}; + +class ReferenceSelectLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.data_type, params.broadcast, params.select_input_pshape, params.if_input_pshape, params.else_input_pshape); + inputData = {params.select_input, params.if_input, params.else_input}; + refOutData = {params.expected_output}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "data_type=" << param.data_type << "_"; + result << "broadcast=" << param.broadcast.m_type << "_"; + result << "select_shape=" << param.select_input_pshape << "_"; + result << "if_shape=" << param.if_input_pshape << "_"; + result << "else_shape=" << param.else_input_pshape; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const element::Type& data_type, const op::AutoBroadcastSpec& broadcast, + const PartialShape& select_pshape, const PartialShape& if_pshape, const PartialShape& else_pshape) { + auto A = std::make_shared(element::boolean, select_pshape); + auto B = std::make_shared(data_type, if_pshape); + auto C = std::make_shared(data_type, else_pshape); + return std::make_shared(std::make_shared(A, B, C, broadcast), ParameterVector {A, B, C}); + } +}; + +TEST_P(ReferenceSelectLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_Select_With_Hardcoded_Refs, ReferenceSelectLayerTest, + ::testing::Values( + // fp32, no brodcasting + SelectParams(element::f32, // if/else/output data type + op::AutoBroadcastType::NONE, // broadcasting type + PartialShape {2, 2, 2}, // select shape + std::vector {0, 1, 1, 0, 0, 1, 0, 1}, // select data + PartialShape {2, 2, 2}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {2, 2, 2}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 6, 17, 8}), // expected output data + // i32, no brodcasting + SelectParams(element::i32, // if/else/output data type + op::AutoBroadcastType::NONE, // broadcasting type + PartialShape {2, 2, 2}, // select shape + std::vector {0, 1, 1, 0, 0, 1, 0, 1}, // select data + PartialShape {2, 2, 2}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {2, 2, 2}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 6, 17, 8}), // expected output data + // fp32, numpy brodcasting + SelectParams(element::f32, // if/else/output data type + op::AutoBroadcastType::NUMPY, // broadcasting type + PartialShape {4}, // select shape + std::vector {0, 1, 1, 0}, // select data + PartialShape {4}, // if shape + std::vector {1, 2, 3, 4}, // if data + PartialShape {2, 4}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 2, 3, 18}), // expected output data + // i32, numpy brodcasting + SelectParams(element::i32, // if/else/output data type + op::AutoBroadcastType::NUMPY, // broadcasting type + PartialShape {4}, // select shape + std::vector {0, 1, 1, 0}, // select data + PartialShape {4}, // if shape + std::vector {1, 2, 3, 4}, // if data + PartialShape {2, 4}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 2, 3, 18}), // expected output data + // fp32, pdpd brodcasting + SelectParams(element::f32, // if/else/output data type + {op::AutoBroadcastType::PDPD, -1}, // broadcasting type + PartialShape {2, 4}, // select shape + std::vector {0, 0, 0, 0, 0, 1, 1, 1}, // select data + PartialShape {2, 4}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {4}, // else shape + std::vector {11, 12, 13, 14}, // else data + std::vector {11, 12, 13, 14, 11, 6, 7, 8}), // expected output data + // i32, pdpd brodcasting + SelectParams(element::i32, // if/else/output data type + {op::AutoBroadcastType::PDPD, -1}, // broadcasting type + PartialShape {2, 4}, // select shape + std::vector {0, 0, 0, 0, 0, 1, 1, 1}, // select data + PartialShape {2, 4}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {4}, // else shape + std::vector {11, 12, 13, 14}, // else data + std::vector {11, 12, 13, 14, 11, 6, 7, 8})), // expected output data + ReferenceSelectLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/sign.cpp b/docs/template_plugin/tests/functional/op_reference/sign.cpp new file mode 100644 index 00000000000000..ca1505cea1368e --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/sign.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct SignParams { + template + SignParams(const PartialShape& shape, const element::Type& iType, const element::Type& oType, const std::vector& iValues, + const std::vector& oValues) + : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(oType, oValues)) {} + PartialShape pshape; + element::Type inType; + element::Type outType; + Blob::Ptr inputData; + Blob::Ptr refData; +}; + +class ReferenceSignLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto sign = std::make_shared(in); + return std::make_shared(NodeVector {sign}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceSignLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Sign_With_Hardcoded_Refs, ReferenceSignLayerTest, + ::testing::Values( + SignParams(PartialShape {6}, element::f32, element::f32, + std::vector {1, -2, 0, -4.8f, 4.8f, -0.0f}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::f16, element::f16, + std::vector {1, -2, 0, -4.8f, 4.8f, -0.0f}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::u64, element::u64, + std::vector {1, 2, 0, 4, 4, 0}, + std::vector {1, 1, 0, 1, 1, 0}), + SignParams(PartialShape {6}, element::u32, element::u32, + std::vector {1, 2, 0, 4, 4, 0}, + std::vector {1, 1, 0, 1, 1, 0}), + SignParams(PartialShape {6}, element::i32, element::i32, + std::vector {1, -2, 0, -4, 4, -0}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::i64, element::i64, + std::vector {1, -2, 0, -4, 4, -0}, + std::vector {1, -1, 0, -1, 1, 0})), + ReferenceSignLayerTest::getTestCaseName); diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index d33a73a5fa760d..eb844d25b76e02 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -29,6 +29,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) if (NOT TBB_FOUND) + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\ SEQ method will be used.") endif () @@ -95,6 +96,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(IE_THREAD_DEFINE "IE_THREAD_TBB") ie_target_link_libraries(${TARGET_NAME} ${LINK_TYPE} ${TBB_IMPORTED_TARGETS}) else () + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR path.\ SEQ method will be used for ${TARGET_NAME}") endif () @@ -133,6 +135,7 @@ function(set_ie_threading_interface_for TARGET_NAME) if (NOT OMP_LIBRARIES_RELEASE) ext_message(WARNING "Intel OpenMP not found. Intel OpenMP support will be disabled. ${IE_THREAD_DEFINE} is defined") + set(THREADING "SEQ" PARENT_SCOPE) else () set(IE_THREAD_DEFINE "IE_THREAD_OMP") diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index d134c29171802c..e6ec3799a3ccf8 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -6,14 +6,14 @@ include_guard(GLOBAL) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES_HASH - "420b300d193f7fcfe7e3f9bbec6c247d65b784a500b5cd2effb7cb1ec6e1b209" - "bfe3caf270b168b9de18ef88f04bde3907d7d12a679f1fa7cc580423c35db637") + "54a732b5fb17a0124652bc5113fa628c718a5af40621bca309471cb5ffd9271b" + "5750b2831c77ef54b8e243d3840c5ed1c9509681d55aee7e369d558cef628735") # # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1688) +set(FIRMWARE_PACKAGE_VERSION 1717) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") # diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt index 69760a52de96a6..a0e1b3469c937a 100644 --- a/inference-engine/ie_bridges/c/src/CMakeLists.txt +++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt @@ -14,7 +14,7 @@ add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES}) target_link_libraries(${TARGET_NAME} PRIVATE inference_engine) target_include_directories(${TARGET_NAME} PUBLIC - $ + $ $) add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) @@ -40,5 +40,5 @@ install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c) install(DIRECTORY ${InferenceEngine_C_API_SOURCE_DIR}/include/ - DESTINATION ${IE_CPACK_IE_DIR}/include + DESTINATION ${IE_CPACK_IE_DIR}/include/ie COMPONENT core_c_dev) diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 7b93a4291a2d3a..a88b1017a124f4 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -43,12 +43,14 @@ else() endif() if(ENABLE_CONDA_FOLDER) + set(PYTHON_COMPONENT conda_${PYTHON_VERSION}) if(WIN32) set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python_api/Conda/${PYTHON_VERSION}/openvino) else() set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/Conda/${PYTHON_VERSION}/openvino) endif() else() + set(PYTHON_COMPONENT ${PYTHON_VERSION}) if(WIN32) set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python_api/${PYTHON_VERSION}/openvino) else() @@ -56,6 +58,13 @@ else() endif() endif() +function(ov_python_disable_intel_warnings target) + if(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 1292: unknown attribute "fallthrough" + target_compile_options(${target} PRIVATE -diag-disable=1292) + endif() +endfunction() + set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory (src/openvino/inference_engine) add_subdirectory (src/openvino/offline_transformations) @@ -74,19 +83,19 @@ endif() # install -ie_cpack_add_component(${PYTHON_VERSION}) +ie_cpack_add_component(${PYTHON_COMPONENT}) install(FILES requirements.txt DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION} - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) install(FILES requirements.txt DESTINATION ${PYTHON_BRIDGE_CPACK_PATH} - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS src/openvino/__init__.py DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) # install Python samples @@ -96,4 +105,4 @@ install(DIRECTORY sample/ DESTINATION ${IE_CPACK_IE_DIR}/samples/python COMPONENT python_samples) -ie_cpack(${PYTHON_VERSION} python_samples) +ie_cpack(${PYTHON_COMPONENT} python_samples) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index a236db836d60ae..cfab4f2d907f28 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -20,13 +20,15 @@ set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON) # create target cython_add_module(${TARGET_NAME} ${SOURCES}) -set(INSTALLED_TARGETS ${TARGET_NAME}) +ov_python_disable_intel_warnings(${TARGET_NAME}) +set(INSTALLED_TARGETS ${TARGET_NAME}) list(REMOVE_ITEM PYX_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx") foreach(PYX_FILE IN LISTS PYX_SOURCES) get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE) cython_add_module(${PYX_NAME} ${PYX_FILE}) + ov_python_disable_intel_warnings(${PYX_NAME}) add_dependencies(${TARGET_NAME} ${PYX_NAME}) target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) @@ -70,12 +72,12 @@ add_custom_command(TARGET ${TARGET_NAME} # install install(TARGETS ${INSTALLED_TARGETS} - RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION} - LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION}) + RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_COMPONENT} + LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS __init__.py DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME} EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx") diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx index 66269fba630679..4a10b37fa2847a 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx @@ -284,7 +284,9 @@ cdef class IECore: # If the parameter is not specified, the default configuration is handled automatically. # @return Instance of IECore class def __cinit__(self, xml_config_file: str = ""): - self.impl = C.IECore(xml_config_file.encode()) + cdef string c_xml_config_file = xml_config_file.encode() + with nogil: + self.impl = C.IECore(c_xml_config_file) ## Get a `namedtuple` object with versions of the plugin specified # @param device_name: Name of the the registered plugin @@ -326,12 +328,15 @@ cdef class IECore: cdef string weights_ cdef string model_ cdef IENetwork net = IENetwork() + cdef size_t bin_size if init_from_buffer: model_ = bytes(model) - net.impl = self.impl.readNetwork(model_, weights, len(weights)) + bin_buffer = weights + bin_size = len(weights) + with nogil: + net.impl = self.impl.readNetwork(model_, bin_buffer, bin_size) else: weights_ = "".encode() - model = os.fspath(model) if not os.path.isfile(model): raise Exception(f"Path to the model {model} doesn't exist or it's a directory") @@ -342,8 +347,8 @@ cdef class IECore: if not os.path.isfile(weights): raise Exception(f"Path to the weights {weights} doesn't exist or it's a directory") weights_ = weights.encode() - - net.impl = self.impl.readNetwork(model_, weights_) + with nogil: + net.impl = self.impl.readNetwork(model_, weights_) return net ## Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device name @@ -367,16 +372,22 @@ cdef class IECore: cpdef ExecutableNetwork load_network(self, network: [IENetwork, str], str device_name, config=None, int num_requests=1): cdef ExecutableNetwork exec_net = ExecutableNetwork() cdef map[string, string] c_config + cdef string c_device_name + cdef string c_network_path if num_requests < 0: raise ValueError(f"Incorrect number of requests specified: {num_requests}. Expected positive integer number " "or zero for auto detection") if config: c_config = dict_to_c_map(config) exec_net.ie_core_impl = self.impl + c_device_name = device_name.encode() if isinstance(network, str): - exec_net.impl = move(self.impl.loadNetworkFromFile((network).encode(), device_name.encode(), c_config, num_requests)) + c_network_path = network.encode() + with nogil: + exec_net.impl = move(self.impl.loadNetworkFromFile(c_network_path, c_device_name, c_config, num_requests)) else: - exec_net.impl = move(self.impl.loadNetwork((network).impl, device_name.encode(), c_config, num_requests)) + with nogil: + exec_net.impl = move(self.impl.loadNetwork((network).impl, c_device_name, c_config, num_requests)) return exec_net ## Creates an executable network from a previously exported network @@ -534,7 +545,9 @@ cdef class IECore: # If there are more than one device of a specific type, they all are listed followed by a dot and a number. @property def available_devices(self): - cdef vector[string] c_devices = self.impl.getAvailableDevices() + cdef vector[string] c_devices + with nogil: + c_devices = self.impl.getAvailableDevices() return [d.decode() for d in c_devices] ## This structure stores info about pre-processing of network inputs (scale, mean image, ...) @@ -897,15 +910,19 @@ cdef class ExecutableNetwork: ## A tuple of `InferRequest` instances @property def requests(self): + cdef size_t c_infer_requests_size + with nogil: + c_infer_requests_size = deref(self.impl).infer_requests.size() if len(self._infer_requests) == 0: - for i in range(deref(self.impl).infer_requests.size()): + for i in range(c_infer_requests_size): infer_request = InferRequest() - infer_request.impl = &(deref(self.impl).infer_requests[i]) + with nogil: + infer_request.impl = &(deref(self.impl).infer_requests[i]) infer_request._inputs_list = list(self.input_info.keys()) infer_request._outputs_list = list(self.outputs.keys()) self._infer_requests.append(infer_request) - if len(self._infer_requests) != deref(self.impl).infer_requests.size(): + if len(self._infer_requests) != c_infer_requests_size: raise Exception("Mismatch of infer requests number!") return self._infer_requests @@ -1022,16 +1039,26 @@ cdef class ExecutableNetwork: # If not specified, `timeout` value is set to -1 by default. # @return Request status code: OK or RESULT_NOT_READY cpdef wait(self, num_requests=None, timeout=None): + cdef int status_code + cdef int64_t c_timeout + cdef int c_num_requests if num_requests is None: num_requests = len(self.requests) + c_num_requests = num_requests if timeout is None: timeout = WaitMode.RESULT_READY - return deref(self.impl).wait( num_requests, timeout) + c_timeout = timeout + with nogil: + status_code = deref(self.impl).wait(c_num_requests, c_timeout) + return status_code ## Get idle request ID # @return Request index cpdef get_idle_request_id(self): - return deref(self.impl).getIdleRequestId() + cdef int request_id + with nogil: + request_id = deref(self.impl).getIdleRequestId() + return request_id ctypedef extern void (*cb_type)(void*, int) with gil @@ -1177,8 +1204,8 @@ cdef class InferRequest: cpdef infer(self, inputs=None): if inputs is not None: self._fill_inputs(inputs) - - deref(self.impl).infer() + with nogil: + deref(self.impl).infer() ## Starts asynchronous inference of the infer request and fill outputs array # @@ -1197,7 +1224,8 @@ cdef class InferRequest: self._fill_inputs(inputs) if self._py_callback_used: self._py_callback_called.clear() - deref(self.impl).infer_async() + with nogil: + deref(self.impl).infer_async() ## Waits for the result to become available. Blocks until specified timeout elapses or the result # becomes available, whichever comes first. @@ -1213,9 +1241,14 @@ cdef class InferRequest: # # Usage example: See `async_infer()` method of the the `InferRequest` class. cpdef wait(self, timeout=None): + cdef int status + cdef int64_t c_timeout + cdef int c_wait_mode if self._py_callback_used: # check request status to avoid blocking for idle requests - status = deref(self.impl).wait(WaitMode.STATUS_ONLY) + c_wait_mode = WaitMode.STATUS_ONLY + with nogil: + status = deref(self.impl).wait(c_wait_mode) if status != StatusCode.RESULT_NOT_READY: return status if not self._py_callback_called.is_set(): @@ -1230,8 +1263,10 @@ cdef class InferRequest: if timeout is None: timeout = WaitMode.RESULT_READY - - return deref(self.impl).wait( timeout) + c_timeout = timeout + with nogil: + status = deref(self.impl).wait(c_timeout) + return status ## Queries performance measures per layer to get feedback of what is the most time consuming layer. # @@ -1392,7 +1427,8 @@ cdef class IENetwork: weights_ = weights.encode() self.impl = C.IENetwork(model_, weights_) else: - self.impl = C.IENetwork() + with nogil: + self.impl = C.IENetwork() free(bin_buffer) free(xml_buffer) @@ -1405,7 +1441,9 @@ cdef class IENetwork: ## A dictionary that maps input layer names to InputInfoPtr objects. @property def input_info(self): - cdef map[string, C.InputInfo.Ptr] c_inputs = self.impl.getInputsInfo() + cdef map[string, C.InputInfo.Ptr] c_inputs + with nogil: + c_inputs = self.impl.getInputsInfo() inputs = {} cdef InputInfoPtr input_info_ptr for input in c_inputs: @@ -1438,7 +1476,9 @@ cdef class IENetwork: ## A dictionary that maps output layer names to DataPtr objects @property def outputs(self): - cdef map[string, C.DataPtr] c_outputs = self.impl.getOutputs() + cdef map[string, C.DataPtr] c_outputs + with nogil: + c_outputs = self.impl.getOutputs() outputs = {} cdef DataPtr data_ptr for output in c_outputs: diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd index 6f7fd9180896a9..699af3324529c3 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd @@ -160,21 +160,21 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void exportNetwork(const string & model_file) except + object getMetric(const string & metric_name) except + object getConfig(const string & metric_name) except + - int wait(int num_requests, int64_t timeout) - int getIdleRequestId() + int wait(int num_requests, int64_t timeout) nogil + int getIdleRequestId() nogil shared_ptr[CExecutableNetwork] getPluginLink() except + cdef cppclass IENetwork: - IENetwork() except + + IENetwork() nogil except + IENetwork(object) except + IENetwork(const string &, const string &) except + string name size_t batch_size string precision map[string, vector[size_t]] inputs - const map[string, InputInfo.Ptr] getInputsInfo() except + + const map[string, InputInfo.Ptr] getInputsInfo() nogil except + const map[string, DataPtr] getInputs() except + - map[string, DataPtr] getOutputs() except + + map[string, DataPtr] getOutputs() nogil except + void addOutput(string &, size_t) except + void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except + void setBatch(size_t size) except + @@ -195,23 +195,23 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void setBlob(const string &blob_name, const CBlob.Ptr &blob_ptr, CPreProcessInfo& info) except + const CPreProcessInfo& getPreProcess(const string& blob_name) except + map[string, ProfileInfo] getPerformanceCounts() except + - void infer() except + - void infer_async() except + - int wait(int64_t timeout) except + + void infer() nogil except + + void infer_async() nogil except + + int wait(int64_t timeout) nogil except + void setBatch(int size) except + void setCyCallback(void (*)(void*, int), void *) except + vector[CVariableState] queryState() except + cdef cppclass IECore: - IECore() except + - IECore(const string & xml_config_file) except + + IECore() nogil except + + IECore(const string & xml_config_file) nogil except + map[string, Version] getVersions(const string & deviceName) except + - IENetwork readNetwork(const string& modelPath, const string& binPath) except + - IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) except + + IENetwork readNetwork(const string& modelPath, const string& binPath) nogil except + + IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) nogil except + unique_ptr[IEExecNetwork] loadNetwork(IENetwork network, const string deviceName, - const map[string, string] & config, int num_requests) except + + const map[string, string] & config, int num_requests) nogil except + unique_ptr[IEExecNetwork] loadNetworkFromFile(const string & modelPath, const string & deviceName, - const map[string, string] & config, int num_requests) except + + const map[string, string] & config, int num_requests) nogil except + unique_ptr[IEExecNetwork] importNetwork(const string & modelFIle, const string & deviceName, const map[string, string] & config, int num_requests) except + map[string, string] queryNetwork(IENetwork network, const string deviceName, @@ -221,7 +221,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void unregisterPlugin(const string & deviceName) except + void registerPlugins(const string & xmlConfigFile) except + void addExtension(const string & ext_lib_path, const string & deviceName) except + - vector[string] getAvailableDevices() except + + vector[string] getAvailableDevices() nogil except + object getMetric(const string & deviceName, const string & name) except + object getConfig(const string & deviceName, const string & name) except + diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt index 27c9e7bf898257..512b1662be525c 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_ # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} @@ -54,12 +56,12 @@ add_custom_command(TARGET ${TARGET_NAME} # install # TODO: use ${PYTHON_VERSION}_dev component below -# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_VERSION}) +# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT}) install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION} - LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION}) + RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT} + LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS __init__.py DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt index 8367f941d9f793..9d3e1e0ffc082d 100644 --- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} diff --git a/inference-engine/ie_bridges/python/tests/conftest.py b/inference-engine/ie_bridges/python/tests/conftest.py index fd327147c33238..e697b58ee63daf 100644 --- a/inference-engine/ie_bridges/python/tests/conftest.py +++ b/inference-engine/ie_bridges/python/tests/conftest.py @@ -21,11 +21,6 @@ def model_onnx_path(): test_onnx = os.path.join(path_to_repo, "models", "test_model", 'test_model.onnx') return test_onnx -def model_prototxt_path(): - path_to_repo = os.environ["MODELS_PATH"] - test_prototxt = os.path.join(path_to_repo, "models", "test_model", 'test_model.prototxt') - return test_prototxt - def image_path(): path_to_repo = os.environ["DATA_PATH"] path_to_img = os.path.join(path_to_repo, 'validation_set', '224x224', 'dog.bmp') diff --git a/inference-engine/ie_bridges/python/tests/test_IECore.py b/inference-engine/ie_bridges/python/tests/test_IECore.py index 41d28f1c41b1f6..ed15b12d9b9df3 100644 --- a/inference-engine/ie_bridges/python/tests/test_IECore.py +++ b/inference-engine/ie_bridges/python/tests/test_IECore.py @@ -5,14 +5,16 @@ import pytest from sys import platform from pathlib import Path +from threading import Thread +from time import sleep, time +from queue import Queue from openvino.inference_engine import IENetwork, IECore, ExecutableNetwork -from conftest import model_path, plugins_path, model_onnx_path, model_prototxt_path +from conftest import model_path, plugins_path, model_onnx_path test_net_xml, test_net_bin = model_path() test_net_onnx = model_onnx_path() -test_net_prototxt = model_prototxt_path() plugins_xml, plugins_win_xml, plugins_osx_xml = plugins_path() @@ -201,18 +203,6 @@ def test_read_network_from_onnx_as_path(): assert isinstance(net, IENetwork) -def test_read_network_from_prototxt(): - ie = IECore() - net = ie.read_network(model=test_net_prototxt) - assert isinstance(net, IENetwork) - - -def test_read_network_from_prototxt_as_path(): - ie = IECore() - net = ie.read_network(model=Path(test_net_prototxt)) - assert isinstance(net, IENetwork) - - def test_incorrect_xml(): ie = IECore() with pytest.raises(Exception) as e: @@ -253,3 +243,37 @@ def test_net_from_buffer_valid(): o_net2 = ref_net.outputs assert ii_net.keys() == ii_net2.keys() assert o_net.keys() == o_net2.keys() + + +@pytest.mark.skipif(os.environ.get("TEST_DEVICE","CPU") != "GPU", reason=f"Device dependent test") +def test_load_network_release_gil(device): + running = True + message_queue = Queue() + def detect_long_gil_holds(): + sleep_time = 0.01 + latency_alert_threshold = 0.1 + # Send a message to indicate the thread is running and ready to detect GIL locks + message_queue.put("ready to detect") + while running: + start_sleep = time() + sleep(sleep_time) + elapsed = time() - start_sleep + if elapsed > latency_alert_threshold: + # Send a message to the testing thread that a long GIL lock occurred + message_queue.put(latency_alert_threshold) + ie = IECore() + net = ie.read_network(model=test_net_xml, weights=test_net_bin) + # Wait for the GIL lock detector to be up and running + gil_hold_detection_thread = Thread(daemon=True, target=detect_long_gil_holds) + gil_hold_detection_thread.start() + # Wait to make sure the thread is started and checking for GIL holds + sleep(0.1) + assert message_queue.get(timeout=5) == "ready to detect" + # Run the function that should unlock the GIL + exec_net = ie.load_network(net, device) + # Ensure resources are closed + running = False + gil_hold_detection_thread.join(timeout=5) + # Assert there were never any long gil locks + assert message_queue.qsize() == 0, \ + f"More than 0 GIL locks occured! Latency: {message_queue.get()})" diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt index 681954f2766d77..1b1931c08a493d 100644 --- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt @@ -56,13 +56,13 @@ endif() add_custom_command(TARGET ie_wheel PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E rm -rf "${CMAKE_CURRENT_BINARY_DIR}/site-packages" + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages" COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel --dist-dir ${CMAKE_BINARY_DIR}/wheels --build=${WHEEL_BUILD} --plat-name=${WHEEL_PLATFORM} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E rm "${CMAKE_CURRENT_SOURCE_DIR}/.env" + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_SOURCE_DIR}/.env" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" COMMENT "Building Python wheel ${WHEEL_PACKAGE_NAME}" VERBATIM diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/ie/cldnn/cldnn_config.hpp similarity index 100% rename from inference-engine/include/cldnn/cldnn_config.hpp rename to inference-engine/include/ie/cldnn/cldnn_config.hpp diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/ie/cpp/ie_cnn_network.h similarity index 99% rename from inference-engine/include/cpp/ie_cnn_network.h rename to inference-engine/include/ie/cpp/ie_cnn_network.h index 1fe5d2173f2267..ef73b67e504e57 100644 --- a/inference-engine/include/cpp/ie_cnn_network.h +++ b/inference-engine/include/ie/cpp/ie_cnn_network.h @@ -20,12 +20,7 @@ #include "ie_common.h" #include "ie_data.h" #include "ie_extension.h" - -namespace ngraph { - -class Function; - -} // namespace ngraph +#include namespace InferenceEngine { diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/ie/cpp/ie_executable_network.hpp similarity index 100% rename from inference-engine/include/cpp/ie_executable_network.hpp rename to inference-engine/include/ie/cpp/ie_executable_network.hpp diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/ie/cpp/ie_infer_request.hpp similarity index 100% rename from inference-engine/include/cpp/ie_infer_request.hpp rename to inference-engine/include/ie/cpp/ie_infer_request.hpp diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/include/ie/cpp/ie_memory_state.hpp similarity index 100% rename from inference-engine/include/cpp/ie_memory_state.hpp rename to inference-engine/include/ie/cpp/ie_memory_state.hpp diff --git a/inference-engine/include/details/ie_blob_iterator.hpp b/inference-engine/include/ie/details/ie_blob_iterator.hpp similarity index 100% rename from inference-engine/include/details/ie_blob_iterator.hpp rename to inference-engine/include/ie/details/ie_blob_iterator.hpp diff --git a/inference-engine/include/details/ie_exception.hpp b/inference-engine/include/ie/details/ie_exception.hpp similarity index 100% rename from inference-engine/include/details/ie_exception.hpp rename to inference-engine/include/ie/details/ie_exception.hpp diff --git a/inference-engine/include/details/ie_pre_allocator.hpp b/inference-engine/include/ie/details/ie_pre_allocator.hpp similarity index 100% rename from inference-engine/include/details/ie_pre_allocator.hpp rename to inference-engine/include/ie/details/ie_pre_allocator.hpp diff --git a/inference-engine/include/details/ie_so_loader.h b/inference-engine/include/ie/details/ie_so_loader.h similarity index 100% rename from inference-engine/include/details/ie_so_loader.h rename to inference-engine/include/ie/details/ie_so_loader.h diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/ie/details/ie_so_pointer.hpp similarity index 100% rename from inference-engine/include/details/ie_so_pointer.hpp rename to inference-engine/include/ie/details/ie_so_pointer.hpp diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/ie/gna/gna_config.hpp similarity index 100% rename from inference-engine/include/gna/gna_config.hpp rename to inference-engine/include/ie/gna/gna_config.hpp diff --git a/inference-engine/include/gpu/details/gpu_context_helpers.hpp b/inference-engine/include/ie/gpu/details/gpu_context_helpers.hpp similarity index 100% rename from inference-engine/include/gpu/details/gpu_context_helpers.hpp rename to inference-engine/include/ie/gpu/details/gpu_context_helpers.hpp diff --git a/inference-engine/include/gpu/gpu_config.hpp b/inference-engine/include/ie/gpu/gpu_config.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_config.hpp rename to inference-engine/include/ie/gpu/gpu_config.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_dx.hpp b/inference-engine/include/ie/gpu/gpu_context_api_dx.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_dx.hpp rename to inference-engine/include/ie/gpu/gpu_context_api_dx.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_ocl.hpp b/inference-engine/include/ie/gpu/gpu_context_api_ocl.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_ocl.hpp rename to inference-engine/include/ie/gpu/gpu_context_api_ocl.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_va.hpp b/inference-engine/include/ie/gpu/gpu_context_api_va.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_va.hpp rename to inference-engine/include/ie/gpu/gpu_context_api_va.hpp diff --git a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp b/inference-engine/include/ie/gpu/gpu_ocl_wrapper.hpp similarity index 97% rename from inference-engine/include/gpu/gpu_ocl_wrapper.hpp rename to inference-engine/include/ie/gpu/gpu_ocl_wrapper.hpp index 85ca2521a76346..496f0974ad51e1 100644 --- a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp +++ b/inference-engine/include/ie/gpu/gpu_ocl_wrapper.hpp @@ -39,7 +39,7 @@ # pragma GCC system_header #endif -#include +#include #ifdef __GNUC__ # pragma GCC diagnostic pop diff --git a/inference-engine/include/gpu/gpu_params.hpp b/inference-engine/include/ie/gpu/gpu_params.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_params.hpp rename to inference-engine/include/ie/gpu/gpu_params.hpp diff --git a/inference-engine/include/hetero/hetero_plugin_config.hpp b/inference-engine/include/ie/hetero/hetero_plugin_config.hpp similarity index 100% rename from inference-engine/include/hetero/hetero_plugin_config.hpp rename to inference-engine/include/ie/hetero/hetero_plugin_config.hpp diff --git a/inference-engine/include/ie_allocator.hpp b/inference-engine/include/ie/ie_allocator.hpp similarity index 100% rename from inference-engine/include/ie_allocator.hpp rename to inference-engine/include/ie/ie_allocator.hpp diff --git a/inference-engine/include/ie_api.h b/inference-engine/include/ie/ie_api.h similarity index 100% rename from inference-engine/include/ie_api.h rename to inference-engine/include/ie/ie_api.h diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie/ie_blob.h similarity index 100% rename from inference-engine/include/ie_blob.h rename to inference-engine/include/ie/ie_blob.h diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie/ie_common.h similarity index 100% rename from inference-engine/include/ie_common.h rename to inference-engine/include/ie/ie_common.h diff --git a/inference-engine/include/ie_compound_blob.h b/inference-engine/include/ie/ie_compound_blob.h similarity index 100% rename from inference-engine/include/ie_compound_blob.h rename to inference-engine/include/ie/ie_compound_blob.h diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie/ie_core.hpp similarity index 100% rename from inference-engine/include/ie_core.hpp rename to inference-engine/include/ie/ie_core.hpp diff --git a/inference-engine/include/ie_data.h b/inference-engine/include/ie/ie_data.h similarity index 100% rename from inference-engine/include/ie_data.h rename to inference-engine/include/ie/ie_data.h diff --git a/inference-engine/include/ie_extension.h b/inference-engine/include/ie/ie_extension.h similarity index 99% rename from inference-engine/include/ie_extension.h rename to inference-engine/include/ie/ie_extension.h index 8014d658d804c7..97184fd5ba4b25 100644 --- a/inference-engine/include/ie_extension.h +++ b/inference-engine/include/ie/ie_extension.h @@ -14,6 +14,7 @@ #include #include +#include #include "ie_iextension.h" #include "details/ie_so_pointer.hpp" diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie/ie_icnn_network.hpp similarity index 99% rename from inference-engine/include/ie_icnn_network.hpp rename to inference-engine/include/ie/ie_icnn_network.hpp index ec640691eccb91..62ef93824eeeea 100644 --- a/inference-engine/include/ie_icnn_network.hpp +++ b/inference-engine/include/ie/ie_icnn_network.hpp @@ -18,11 +18,7 @@ #include "ie_data.h" #include "ie_input_info.hpp" -namespace ngraph { - -class Function; - -} // namespace ngraph +#include namespace InferenceEngine { diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie/ie_iexecutable_network.hpp similarity index 100% rename from inference-engine/include/ie_iexecutable_network.hpp rename to inference-engine/include/ie/ie_iexecutable_network.hpp diff --git a/inference-engine/include/ie_iextension.h b/inference-engine/include/ie/ie_iextension.h similarity index 98% rename from inference-engine/include/ie_iextension.h rename to inference-engine/include/ie/ie_iextension.h index d001b999081928..be327c1537648c 100644 --- a/inference-engine/include/ie_iextension.h +++ b/inference-engine/include/ie/ie_iextension.h @@ -19,6 +19,7 @@ #include "ie_layouts.h" #include "ie_blob.h" #include "ie_version.hpp" +#include /** * @def INFERENCE_EXTENSION_API(TYPE) @@ -30,13 +31,6 @@ #define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE) #endif -namespace ngraph { - -class OpSet; -class Node; - -} // namespace ngraph - namespace InferenceEngine { /** diff --git a/inference-engine/include/ie_iinfer_request.hpp b/inference-engine/include/ie/ie_iinfer_request.hpp similarity index 100% rename from inference-engine/include/ie_iinfer_request.hpp rename to inference-engine/include/ie/ie_iinfer_request.hpp diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/include/ie/ie_input_info.hpp similarity index 100% rename from inference-engine/include/ie_input_info.hpp rename to inference-engine/include/ie/ie_input_info.hpp diff --git a/inference-engine/include/ie_layouts.h b/inference-engine/include/ie/ie_layouts.h similarity index 97% rename from inference-engine/include/ie_layouts.h rename to inference-engine/include/ie/ie_layouts.h index 31c42e1d02ad87..42fe8fbca2cc94 100644 --- a/inference-engine/include/ie_layouts.h +++ b/inference-engine/include/ie/ie_layouts.h @@ -304,6 +304,14 @@ class INFERENCE_ENGINE_API_CLASS(TensorDesc) { */ static Layout getLayoutByDims(const SizeVector& dims); + /** + * @brief Returns the standard layout for the specified tensor rank + * + * @param rank of the requested layout + * @return the standard memory layout + */ + static Layout getLayoutByRank(size_t rank); + private: /** * Memory layout diff --git a/inference-engine/include/ie_locked_memory.hpp b/inference-engine/include/ie/ie_locked_memory.hpp similarity index 100% rename from inference-engine/include/ie_locked_memory.hpp rename to inference-engine/include/ie/ie_locked_memory.hpp diff --git a/inference-engine/include/ie_parallel.hpp b/inference-engine/include/ie/ie_parallel.hpp similarity index 100% rename from inference-engine/include/ie_parallel.hpp rename to inference-engine/include/ie/ie_parallel.hpp diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/include/ie/ie_parameter.hpp similarity index 99% rename from inference-engine/include/ie_parameter.hpp rename to inference-engine/include/ie/ie_parameter.hpp index 4aa6760d474874..425673f45b00b1 100644 --- a/inference-engine/include/ie_parameter.hpp +++ b/inference-engine/include/ie/ie_parameter.hpp @@ -21,12 +21,6 @@ #include "ie_blob.h" -namespace ngraph { - -class Variant; - -} // namespace ngraph - namespace InferenceEngine { /** diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie/ie_plugin_config.hpp similarity index 100% rename from inference-engine/include/ie_plugin_config.hpp rename to inference-engine/include/ie/ie_plugin_config.hpp diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/include/ie/ie_precision.hpp similarity index 100% rename from inference-engine/include/ie_precision.hpp rename to inference-engine/include/ie/ie_precision.hpp diff --git a/inference-engine/include/ie_preprocess.hpp b/inference-engine/include/ie/ie_preprocess.hpp similarity index 100% rename from inference-engine/include/ie_preprocess.hpp rename to inference-engine/include/ie/ie_preprocess.hpp diff --git a/inference-engine/include/ie_remote_context.hpp b/inference-engine/include/ie/ie_remote_context.hpp similarity index 100% rename from inference-engine/include/ie_remote_context.hpp rename to inference-engine/include/ie/ie_remote_context.hpp diff --git a/inference-engine/include/ie_transformations.hpp b/inference-engine/include/ie/ie_transformations.hpp similarity index 100% rename from inference-engine/include/ie_transformations.hpp rename to inference-engine/include/ie/ie_transformations.hpp diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/include/ie/ie_version.hpp similarity index 100% rename from inference-engine/include/ie_version.hpp rename to inference-engine/include/ie/ie_version.hpp diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/include/ie/inference_engine.hpp similarity index 100% rename from inference-engine/include/inference_engine.hpp rename to inference-engine/include/ie/inference_engine.hpp diff --git a/inference-engine/include/multi-device/multi_device_config.hpp b/inference-engine/include/ie/multi-device/multi_device_config.hpp similarity index 100% rename from inference-engine/include/multi-device/multi_device_config.hpp rename to inference-engine/include/ie/multi-device/multi_device_config.hpp diff --git a/inference-engine/include/vpu/hddl_config.hpp b/inference-engine/include/ie/vpu/hddl_config.hpp similarity index 100% rename from inference-engine/include/vpu/hddl_config.hpp rename to inference-engine/include/ie/vpu/hddl_config.hpp diff --git a/inference-engine/include/vpu/hddl_plugin_config.hpp b/inference-engine/include/ie/vpu/hddl_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/hddl_plugin_config.hpp rename to inference-engine/include/ie/vpu/hddl_plugin_config.hpp diff --git a/inference-engine/include/vpu/myriad_config.hpp b/inference-engine/include/ie/vpu/myriad_config.hpp similarity index 100% rename from inference-engine/include/vpu/myriad_config.hpp rename to inference-engine/include/ie/vpu/myriad_config.hpp diff --git a/inference-engine/include/vpu/myriad_plugin_config.hpp b/inference-engine/include/ie/vpu/myriad_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/myriad_plugin_config.hpp rename to inference-engine/include/ie/vpu/myriad_plugin_config.hpp diff --git a/inference-engine/include/vpu/vpu_config.hpp b/inference-engine/include/ie/vpu/vpu_config.hpp similarity index 100% rename from inference-engine/include/vpu/vpu_config.hpp rename to inference-engine/include/ie/vpu/vpu_config.hpp diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/include/ie/vpu/vpu_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/vpu_plugin_config.hpp rename to inference-engine/include/ie/vpu/vpu_plugin_config.hpp diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt index aef11e16f47bf8..7036e778fcf8f3 100644 --- a/inference-engine/samples/CMakeLists.txt +++ b/inference-engine/samples/CMakeLists.txt @@ -56,35 +56,30 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER}) if (WIN32) set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # no asynchronous structured exception handling set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE") if (TREAT_WARNING_AS_ERROR) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") #treating warnings as errors + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") # treating warnings as errors endif () if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-disable:177") endif() + # disable some noisy warnings if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819") #disable some warnings + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819") endif() else() + # treating warnings as errors if(TREAT_WARNING_AS_ERROR) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") #treating warnings as errors + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") - if (APPLE) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument") - elseif(UNIX) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self") - if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized") - endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177") endif() endif() @@ -104,9 +99,6 @@ if(NOT DEFINED CMAKE_CXX_STANDARD) set (CMAKE_CXX_STANDARD 11) set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") - endif() endif() #################################### @@ -135,10 +127,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy") add_subdirectory(thirdparty/cnpy EXCLUDE_FROM_ALL) endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") -endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils") add_subdirectory(common/utils) endif() diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md index 7c61bc570d518e..2d5076a60c613c 100644 --- a/inference-engine/samples/benchmark_app/README.md +++ b/inference-engine/samples/benchmark_app/README.md @@ -95,6 +95,7 @@ Options: -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. -cache_dir "" Optional. Enables caching of loaded models to specified directory. -load_from_file Optional. Loads model from file directly without ReadNetwork. + -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). CPU-specific performance options: -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index af18c908e31b96..a369c2f1055ce1 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -56,6 +56,10 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to "Also, using nstreams>1 is inherently throughput-oriented option, " "while for the best-latency estimations the number of streams should be set to 1."; +/// @brief message for latency percentile settings +static const char infer_latency_percentile_message[] = + "Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median)."; + /// @brief message for enforcing of BF16 execution where it is possible static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced " "if supported by platform.\n" @@ -189,6 +193,9 @@ DEFINE_uint32(nthreads, 0, infer_num_threads_message); /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) DEFINE_string(nstreams, "", infer_num_streams_message); +/// @brief The percentile which will be reported in latency metric +DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message); + /// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability DEFINE_bool(enforcebf16, false, enforce_bf16_message); @@ -278,6 +285,7 @@ static void showUsage() { std::cout << " -layout " << layout_message << std::endl; std::cout << " -cache_dir \"\" " << cache_dir_message << std::endl; std::cout << " -load_from_file " << load_from_file_message << std::endl; + std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl; std::cout << std::endl << " device-specific performance options:" << std::endl; std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl; std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 2a5252ba443a85..8df3bc2f8e400b 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -52,6 +52,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) { throw std::logic_error("Model is required but not set. Please set -m option."); } + if (FLAGS_latency_percentile > 100 || FLAGS_latency_percentile < 1) { + showUsage(); + throw std::logic_error("The percentile value is incorrect. The applicable values range is [1, 100]."); + } if (FLAGS_api != "async" && FLAGS_api != "sync") { throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); } @@ -100,11 +104,10 @@ static void next_step(const std::string additional_info = "") { } template -T getMedianValue(const std::vector& vec) { +T getMedianValue(const std::vector& vec, std::size_t percentile) { std::vector sortedVec(vec); std::sort(sortedVec.begin(), sortedVec.end()); - return (sortedVec.size() % 2 != 0) ? sortedVec[sortedVec.size() / 2ULL] - : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast(2.0); + return sortedVec[(sortedVec.size() / 100) * percentile]; } /** @@ -624,7 +627,7 @@ int main(int argc, char* argv[]) { // wait the latest inference executions inferRequestsQueue.waitAll(); - double latency = getMedianValue(inferRequestsQueue.getLatencies()); + double latency = getMedianValue(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile); double totalDuration = inferRequestsQueue.getDurationInMilliseconds(); double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration; @@ -634,8 +637,14 @@ int main(int argc, char* argv[]) { {"total number of iterations", std::to_string(iteration)}, }); if (device_name.find("MULTI") == std::string::npos) { + std::string latency_label; + if (FLAGS_latency_percentile == 50) { + latency_label = "latency (ms)"; + } else { + latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)"; + } statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { - {"latency (ms)", double_to_string(latency)}, + {latency_label, double_to_string(latency)}, }); } statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}}); @@ -684,8 +693,15 @@ int main(int argc, char* argv[]) { std::cout << "Count: " << iteration << " iterations" << std::endl; std::cout << "Duration: " << double_to_string(totalDuration) << " ms" << std::endl; - if (device_name.find("MULTI") == std::string::npos) - std::cout << "Latency: " << double_to_string(latency) << " ms" << std::endl; + if (device_name.find("MULTI") == std::string::npos) { + std::cout << "Latency"; + if (FLAGS_latency_percentile == 50) { + std::cout << ": "; + } else { + std::cout << " (" << FLAGS_latency_percentile << " percentile): "; + } + std::cout << double_to_string(latency) << " ms" << std::endl; + } std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl; } catch (const std::exception& ex) { slog::err << ex.what() << slog::endl; diff --git a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp index ac92f7c2aa4d4a..1e6ae59bf6fa0b 100644 --- a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp +++ b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp @@ -109,7 +109,7 @@ class CustomReluOp : public ngraph::op::Op { return std::make_shared(new_args.at(0)); } - bool visit_attributes(ngraph::AttributeVisitor& visitor) override { + bool visit_attributes(ngraph::AttributeVisitor&) override { return true; } }; diff --git a/inference-engine/samples/speech_sample/fileutils.hpp b/inference-engine/samples/speech_sample/fileutils.hpp index 0cf5adc1922bde..b437c0a7af32e5 100644 --- a/inference-engine/samples/speech_sample/fileutils.hpp +++ b/inference-engine/samples/speech_sample/fileutils.hpp @@ -30,7 +30,7 @@ class ArkFile : public BaseFile { * @param ptrNumMemoryBytes pointer to specific number of memory bytes * @return none. */ - virtual void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes); + void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) override; /** * @brief Load Kaldi ARK speech feature vector file @@ -43,8 +43,8 @@ class ArkFile : public BaseFile { * @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default) * @return none. */ - virtual void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector& memory, uint32_t* ptrNumRows, - uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement); + void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector& memory, uint32_t* ptrNumRows, uint32_t* ptrNumColumns, + uint32_t* ptrNumBytesPerElement) override; /** * @brief Save Kaldi ARK speech feature vector file @@ -56,7 +56,7 @@ class ArkFile : public BaseFile { * @param numColumns number of columns * @return none. */ - virtual void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns); + void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) override; }; /// @brief Responsible to work with .npz files @@ -70,7 +70,7 @@ class NumpyFile : public BaseFile { * @param ptrNumMemoryBytes pointer to specific number of memory bytes * @return none. */ - virtual void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes); + void GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, uint32_t* ptrNumArrays, uint32_t* ptrNumMemoryBytes) override; /** * @brief Load Numpy* uncompressed NPZ speech feature vector file @@ -83,8 +83,8 @@ class NumpyFile : public BaseFile { * @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default) * @return none. */ - virtual void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector& memory, uint32_t* ptrNumRows, - uint32_t* ptrNumColumns, uint32_t* ptrNumBytesPerElement); + void LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector& memory, uint32_t* ptrNumRows, uint32_t* ptrNumColumns, + uint32_t* ptrNumBytesPerElement) override; /** * @brief Save Numpy* uncompressed NPZ speech feature vector file @@ -96,5 +96,5 @@ class NumpyFile : public BaseFile { * @param numColumns number of columns * @return none. */ - virtual void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns); + void SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) override; }; diff --git a/inference-engine/src/auto_plugin/auto_plugin.cpp b/inference-engine/src/auto_plugin/auto_plugin.cpp index 94b6a8a8b71794..75e80faa2b45e4 100644 --- a/inference-engine/src/auto_plugin/auto_plugin.cpp +++ b/inference-engine/src/auto_plugin/auto_plugin.cpp @@ -274,31 +274,108 @@ DeviceName AutoInferencePlugin::SelectDevice(const std::vector& meta } std::vector CPU; - std::vector GPU; + std::vector dGPU; + std::vector iGPU; + std::vector MYRIAD; + std::vector VPUX; for (auto& item : metaDevices) { if (item.find("CPU") == 0) { CPU.push_back(item); continue; } + if (item.find("MYRIAD") == 0) { + MYRIAD.push_back(item); + continue; + } + if (item.find("VPUX") == 0) { + VPUX.push_back(item); + continue; + } if (item.find("GPU") == 0) { - GPU.push_back(item); + auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as(); + if (gpuFullDeviceName.find("iGPU") != std::string::npos) { + iGPU.push_back(item); + } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) { + dGPU.push_back(item); + } continue; } } - if (CPU.empty() && GPU.empty()) { + if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) { IE_THROW(NotFound) << "No available device found"; } - // Sort GPU by name: GPU.2 > GPU.1 > GPU.0 > GPU, so we always choose the GPU[0] as best device - std::sort(GPU.begin(), GPU.end(), [](const DeviceName& a, const DeviceName& b)->bool{return b < a;}); + // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU + if (!dGPU.empty()) { + for (auto&& item : dGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!VPUX.empty()) { + for (auto&& item : VPUX) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!iGPU.empty()) { + for (auto&& item : iGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!MYRIAD.empty()) { + for (auto&& item : MYRIAD) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } - for (auto&& item : GPU) { - std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto res = std::find(capability.begin(), capability.end(), networkPrecision); - if (res != capability.end()) { - return item; + // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16. + if (networkPrecision == "FP32") { + if (!dGPU.empty()) { + for (auto&& item : dGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!VPUX.empty()) { + for (auto&& item : VPUX) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!iGPU.empty()) { + for (auto&& item : iGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!MYRIAD.empty()) { + for (auto&& item : MYRIAD) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } } } diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt index e292228c73f664..46dfd5e9fce858 100644 --- a/inference-engine/src/cldnn_engine/CMakeLists.txt +++ b/inference-engine/src/cldnn_engine/CMakeLists.txt @@ -12,7 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC) endif() endif() -if(GPU_DEBUG_CONFIG) +if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) endif() diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 72a34dd855af48..206c50c93c857a 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -70,9 +70,12 @@ #include #include #include -#include +#include #include +#include +#include #include +#include #include #include @@ -83,6 +86,7 @@ #include "gpu/gpu_config.hpp" #include "cldnn/runtime/device_query.hpp" +#include "cldnn/runtime/debug_configuration.hpp" #ifdef __linux__ # include @@ -150,10 +154,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork"); auto nGraphFunc = clonedNetwork.getFunction(); + using const_node_ptr = const std::shared_ptr; + bool enableInt8; { ngraph::pass::Manager manager; - enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc); if (enableInt8) { manager.register_pass( std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); @@ -207,8 +213,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc auto pass_config = manager.get_pass_config(); - using const_node_ptr = const std::shared_ptr; - // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 pass_config->set_callback( @@ -390,28 +394,78 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc if (!config.enable_fp16_for_quantized_models) { manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); } - auto lptPrerequisites = manager.register_pass(); - const std::vector supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(); - manager.run_passes(nGraphFunc); - auto params = LayerTransformation::Params(true, // updatePrecisions - LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations - LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights - true); // supportAsymmetricQuantization - LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params) - .add(LayerTransformation::Params(params) - .setSupportAsymmetricQuantization(false) - .setSupport3DTensorOnActivations(false)) - .add(LayerTransformation::Params(params) - .setSupportAsymmetricQuantization(false) - .setDeconvolutionSpecificChannelsRatio(true)) - // INT8 StridedSlice not supported - .remove()); - - transformer.transform(nGraphFunc); + auto supportedPrecisions = std::vector({ + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}}, + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({}) + }); + + auto perTensorQuantization = std::vector({ + OperationPerTensorQuantizationRestriction::create({0}), + OperationPerTensorQuantizationRestriction::create({0}), + }); + + ngraph::pass::Manager lptManager; + + auto lptPassConfig = lptManager.get_pass_config(); + lptPassConfig->disable(); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool { + const auto rank = shape.rank(); + if (rank.is_dynamic()) { + return false; + } + if (rank.get_length() < 2ul) { + return false; + } + const auto dimension = shape[1]; + if (dimension.is_dynamic()) { + return false; + } + channel = dimension.get_length(); + return true; + }; + + size_t inputChannels; + if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) { + return true; + } + + size_t outputChannels; + if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) { + return true; + } + + + if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) { + return true; + } + + return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + return MatMulTransformation::is3DTensorOnActivations(node); + }); + + lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptManager.run_passes(nGraphFunc); } { @@ -436,6 +490,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc manager.run_passes(nGraphFunc); } } + + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + clonedNetwork.serialize(debug_config->dump_graphs + "/transformed_func.xml"); + } return clonedNetwork; } diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index 2b333a38ee9d20..53cefa30cf76e0 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -88,11 +88,11 @@ void CLDNNGraph::Build() { std::shared_ptr CLDNNGraph::BuildNetwork(std::shared_ptr program) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork"); - auto network = std::make_shared(*program, m_stream_id); + auto network = std::make_shared(program, m_stream_id); if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) { static int net_id = 0; - auto steps_info = network->get_optimization_steps_info(); + auto steps_info = network->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true)); diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp index e562447189b6c7..0c0ddf7e637050 100644 --- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp +++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp @@ -194,16 +194,17 @@ REGISTER_FACTORY(v5, LSTMSequence); //REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion REGISTER_FACTORY(v5, Round); REGISTER_FACTORY(v5, GatherND); +REGISTER_FACTORY(v5, Loop); // ----------------------------- Unsupported v5 ops ----------------------------- // // REGISTER_FACTORY(v5, BatchNormInference); // REGISTER_FACTORY(v5, GRUSequence); -// REGISTER_FACTORY(v5, Loop); // REGISTER_FACTORY(v5, RNNSequence); // ------------------------------ Supported v6 ops ------------------------------ // REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen); REGISTER_FACTORY(v6, MVN); +REGISTER_FACTORY(v6, GatherElements); // ------------------------------ Supported v7 ops ------------------------------ // REGISTER_FACTORY(v7, Gather); diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 275aeca31ca789..7386501f0b1671 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -6,7 +6,6 @@ #include "ngraph/ops.hpp" #include "ngraph_ops/nms_ie_internal.hpp" #include "cldnn_itt.h" -#include "cldnn/runtime/debug_configuration.hpp" using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -178,16 +177,11 @@ std::shared_ptr Program::BuildProgram(const std::vectordump_graphs.empty()) { - options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs)); - } - options.set_option(cldnn::build_option::optimize_data(true)); options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig)); @@ -199,7 +193,7 @@ std::shared_ptr Program::BuildProgram(const std::vector(*m_engine, *m_topology, options); + auto program = cldnn::program::build_program(*m_engine, *m_topology, options); CleanupBuild(); return program; diff --git a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp new file mode 100644 index 00000000000000..d61382807506c1 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn_program.h" +#include "cldnn_common_utils.h" + +#include "ngraph/op/gather_elements.hpp" +#include "ngraph/op/constant.hpp" + +#include "cldnn/primitives/gather_elements.hpp" + +namespace CLDNNPlugin { + +static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) { + if (axis < 0) + axis += rank; + if (axis < 0 || axis >= rank) + IE_THROW() << "GatherElements axis is not correspond to number of dimensions"; + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: return cldnn::gather_elements::gather_elements_axis::along_b; + case 1: return cldnn::gather_elements::gather_elements_axis::along_f; + case 2: return cldnn::gather_elements::gather_elements_axis::along_x; + case 3: return cldnn::gather_elements::gather_elements_axis::along_y; + case 4: return cldnn::gather_elements::gather_elements_axis::along_z; + case 5: return cldnn::gather_elements::gather_elements_axis::along_w; + default: IE_THROW() << "Unsupported GatherElements axis: " << axis; + } + return cldnn::gather_elements::gather_elements_axis::along_f; // shouldn't get here +} + +void CreateGatherElementsOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {2}); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + std::string layerName = layer_type_name_ID(op); + + size_t rank = op->get_input_shape(0).size(); + int32_t axis = static_cast(op->get_axis()); + + auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size()); + + auto primitive = cldnn::gather_elements(layerName, + inputPrimitives[0], + inputPrimitives[1], + outLayout, + CldnnTensorFromIEDims(op->get_output_shape(0)), + GetGatherAxis(axis, rank)); + + p.AddPrimitive(primitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v6, GatherElements); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/ops/loop.cpp b/inference-engine/src/cldnn_engine/ops/loop.cpp new file mode 100644 index 00000000000000..1ac452265b8820 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/loop.cpp @@ -0,0 +1,227 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include "cldnn_program.h" +#include "cldnn_common_utils.h" +#include "cldnn_engine.h" + +#include + +#include "ngraph/op/loop.hpp" +#include "ngraph/op/constant.hpp" +#include "ngraph/op/util/sub_graph_base.hpp" +#include "transformations/utils/utils.hpp" +#include "ie_ngraph_utils.hpp" + +#include "cldnn/primitives/loop.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/graph/topology.hpp" + +#include +#include + +using Loop = ngraph::op::v5::Loop; + +namespace CLDNNPlugin { + +template +static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { + auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); + cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; + *ptr.begin() = num; + return {id, mem}; +} + +static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr& op, + const cldnn::primitive_id& id, const cldnn::primitive_id& input, + const int32_t output_idx) { + const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx)); + const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); + const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); + cldnn::layout output_layout = cldnn::layout(precision, format, tensor); + auto mem = p.GetEngine().allocate_memory(output_layout); + auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency + return md; +} + +static void UpdateBackedge(std::vector& back_edges, + const cldnn::primitive_id& old_primitive_id, const cldnn::primitive_id& new_primitive_id) { + for (auto& back_edge : back_edges) { + if (back_edge.from == old_primitive_id) { + back_edge.from = new_primitive_id; + } + } +} + +static std::string GetExternalInputName(const int64_t body_parameter_index, + const std::shared_ptr& op) { + const auto& loop_input_descs = op->get_input_descriptions(); + for (const auto& loop_input_desc : loop_input_descs) { + if (loop_input_desc->m_body_parameter_index == body_parameter_index) { + auto external_node = op->get_input_node_shared_ptr(loop_input_desc->m_input_index); + return layer_type_name_ID(external_node); + } + } + return {""}; +} + +void CreateLoopOp(Program& p, const std::shared_ptr& op) { + const std::string layerName = layer_type_name_ID(op); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + const auto& loop_input_descs = op->get_input_descriptions(); + const auto& loop_output_descs = op->get_output_descriptions(); + const auto& body_inputs = op->get_function()->get_parameters(); + const auto& body_outputs = op->get_function()->get_results(); + + InferenceEngine::CNNNetwork body_network(op->get_function()); + auto networkInputs = body_network.getInputsInfo(); + auto networkOutputs = body_network.getOutputsInfo(); + + // Set special body ports: current_iteration input , execution condition output + auto special_body_ports = op->get_special_body_ports(); + + std::string body_current_iteration_id; + if (special_body_ports.current_iteration_input_idx >= 0) { + auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx); + body_current_iteration_id = layer_type_name_ID(current_iteration_input); + std::string input_name = ngraph::op::util::create_ie_output_name(current_iteration_input); + const auto networkInput = networkInputs.at(input_name); + auto precision = InferenceEngine::details::convertPrecision(current_iteration_input->get_element_type()); + networkInput->setPrecision(precision); + } + + cldnn::primitive_id body_execution_condition_id; + if (special_body_ports.body_condition_output_idx >= 0) { + auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0); + body_execution_condition_id = layer_type_name_ID(body_condition_output); + std::string output_name = ngraph::op::util::create_ie_output_name(body_condition_output); + const auto networkOutput = networkOutputs.at(output_name); + networkOutput->setPrecision(InferenceEngine::Precision::I64); + } + + // get body topology from ngraph function + Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true); + auto body_topology = *body_program.GetTopology(); + + // setup input_primitive_maps/ output_primitive_maps and back_edges + std::vector input_primitive_maps; + std::vector output_primitive_maps; + std::vector back_edges; + + // set input mapping & back edges + for (const auto& loop_input_desc : loop_input_descs) { + const cldnn::primitive_id& external_id = inputPrimitives.at(loop_input_desc->m_input_index); + auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_input); + + // set input mapping + if (const auto& sliceInfo = + std::dynamic_pointer_cast(loop_input_desc)) { + // sliced input + input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis, + sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride); + } else { + // input without slicing + input_primitive_maps.emplace_back(external_id, internal_id); + } + + // set back edges + if (const auto& mergedInput = + std::dynamic_pointer_cast(loop_input_desc)) { + // backedge + const auto& to = body_inputs.at(mergedInput->m_body_parameter_index); + const auto& from = body_outputs.at(mergedInput->m_body_value_index); + + cldnn::primitive_id to_id = layer_type_name_ID(to); + cldnn::primitive_id from_id = layer_type_name_ID(from); + + // reset output data type because the data types of the outputs of the + // body topology are always FP32 regardless of ngraph data type + { + const auto from_prim = body_topology.at(from_id); + const auto& to_ngraph_type = to->get_element_type(); + const auto to_cldnn_type = DataTypeFromPrecision(to_ngraph_type); + from_prim->output_data_type = to_cldnn_type; + } + back_edges.emplace_back(from_id, to_id); + } + } + + // set trip count, initial execution condition, num iteration primitives + // they should be mutable_data to prevent from being optimized out + const cldnn::primitive_id trip_count_id = layer_type_name_ID(op->get_input_node_shared_ptr(0)); + const cldnn::primitive_id execution_condition_id = layer_type_name_ID(op->get_input_node_shared_ptr(1)); + const int64_t num_iterations = op->get_num_iterations(); + if (num_iterations < 0) { + IE_THROW() << "loop's num_iteration cannot be negative"; + } + const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; + { + cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0); + p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() }; + p.primitiveIDs[num_iteration_id] = num_iteration_id; + p.AddPrimitive(num_iteration); + p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op); + } + + // set output mapping + for (const auto& loop_output_desc : loop_output_descs) { + const uint64_t output_idx = loop_output_desc->m_output_index; + + // Add additional mutable_data for multiple outputs + // primitive ID should be . if output_idx > 0 + // otherwise primitive ID should be equals to TI primitive ID + const std::string layerNameWithIndex = layerName + "." + std::to_string(output_idx); + std::string external_id; + if (output_idx > 0) { + cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx); + p.AddPrimitive(output_data); + p.AddInnerPrimitiveToProfiler(layerNameWithIndex, layerName, op); + p.primitiveIDs[layerNameWithIndex] = layerNameWithIndex; + external_id = layerNameWithIndex; + } else { + p.primitiveIDs[layerNameWithIndex] = layerName; + p.primitiveIDs[layerName] = layerName; + external_id = layerName; + } + const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_output); + + // update primitive_map + if (const auto& concatOutput = + std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires concatenation + output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis, + concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); + } + if (std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires no concatenation + output_primitive_maps.emplace_back(external_id, internal_id); + } + } + + const cldnn::loop loopPrimitive( + layerName, /* layer name of this primitive (output id) */ + inputPrimitives, /* inputs of this layer */ + body_topology, /* body network */ + trip_count_id, /* trip_count data in outer network, always same as num_iterations in TI */ + execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */ + num_iteration_id, /* actual number of iteration data in body network */ + input_primitive_maps, /* input mappings connecting outer network and inner network */ + output_primitive_maps, /* output mappings connecting outer network and inner network */ + back_edges, /* back edge mapping */ + num_iterations, /* max iteration, i.e. length of iteration axis */ + body_current_iteration_id, + body_execution_condition_id); + + p.AddPrimitive(loopPrimitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v5, Loop); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/ops/matmul.cpp b/inference-engine/src/cldnn_engine/ops/matmul.cpp index a8818c9e6f67ee..3d09fc7fd4e5e6 100644 --- a/inference-engine/src/cldnn_engine/ops/matmul.cpp +++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp @@ -62,6 +62,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o bool is_fc = IsNodeOnConstPath(op->get_input_node_shared_ptr(1)); is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2; + // TODO: This conditions can be relaxed with proper handling in FC path + is_fc &= shape_b.size() > 1 && shape_a.size() > 1; if (is_fc) { ngraph::Shape shape_a_aligned, shape_b_aligned; @@ -73,10 +75,10 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto inputName = inputPrimitives[0]; auto weightsName = inputPrimitives[1]; + // Weights normalization if (!op->get_transpose_b()) { - ngraph::Shape output_shape = shape_b; - std::vector transpose_order(output_shape.size()); + std::vector transpose_order(shape_b.size()); std::iota(transpose_order.begin(), transpose_order.end(), 0); std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2)); @@ -95,8 +97,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o // Input normalization if (op->get_transpose_a()) { - ngraph::Shape output_shape = shape_a; - std::vector transpose_order(output_shape.size()); + std::vector transpose_order(shape_a.size()); std::iota(transpose_order.begin(), transpose_order.end(), 0); std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2)); @@ -131,16 +132,20 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o if (reshape_fc) { inputName = reshape_to_2d(shape_a, inputName, shape_a.back(), "_cldnn_reshape_in"); + } + + if (shape_b.size() != 2) { weightsName = reshape_to_2d(shape_b, weightsName, K, "_cldnn_reshape_weights"); } + auto input_rank = reshape_fc ? 2 : shape_a.size(); auto fcPrim = cldnn::fully_connected(layerName, inputName, weightsName, "", DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::padding(), - op->get_output_shape(0).size()); + input_rank); p.AddPrimitive(fcPrim); @@ -196,7 +201,29 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape"; // Extend input dimensions by prepending ones - inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul); + if (inputDimsN == 1) { + // One-dimensional tensors unsqueezing is applied for each input independently. + // The axes inserted in this step are not included in the output shape. + // * If rank of the **first** input is equal to 1, it is always unsqueezed to 2D tensor **row vector** (regardless of `transpose_a`) + // by adding axes with size 1 at ROW_INDEX_DIM, to the **left** of the shape. For example `[S]` will be reshaped to `[1, S]`. + // * If rank of the **second** input is equal to 1, it is always unsqueezed to 2D tensor **column vector** (regardless of `transpose_b`) + // by adding axes with size 1 at COL_INDEX_DIM, to the **right** of the shape. For example `[S]` will be reshaped to `[S, 1]`. + bool transpose = false; + if (i == 0) { + transpose = op->get_transpose_a(); + inputDims.insert(inputDims.begin(), 1); + } else { + transpose = op->get_transpose_b(); + inputDims.insert(inputDims.end(), 1); + } + // Specs says that shapes must be unsqueezed regardless of tranpose flag, but primitive implementation always respects transposes + // so we have to swap dimensions correspondingly to have consistent shapes. + if (transpose) { + std::swap(inputDims[0], inputDims[1]); + } + } + if (inputDimsN < outDimsN) + inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul); auto targetShape = gemmSpecificTensor(inputDims); diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h index d08d9346d35c89..0b00b41ec830d7 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_types.h +++ b/inference-engine/src/gna_plugin/backend/dnn_types.h @@ -227,7 +227,7 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) { return r->second; } -static std::string OvGnaTypeToString(OvGnaType type) { +inline std::string OvGnaTypeToString(OvGnaType type) { static const std::map typeToString = { {OvGnaTypeInt8, "OvGnaTypeInt8"}, {OvGnaTypeInt16, "OvGnaTypeInt16"}, @@ -241,7 +241,7 @@ static std::string OvGnaTypeToString(OvGnaType type) { return r->second; } -static std::string OvGnaModeToString(OvGnaMode mode) { +inline std::string OvGnaModeToString(OvGnaMode mode) { static const std::map modeToString = { {OvGnaModeDefault, "OvGnaModeDefault"}, {OvGnaModeDisabled, "OvGnaModeDisabled"}, diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 90af04519291a6..6a3af8e428bde6 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -24,6 +24,10 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16; constexpr uint32_t affineMaxBatchSize = 8; +constexpr uint32_t maxPoolMaxWindowSize = 6; + +constexpr uint32_t copyMaxGrouping = 8; + namespace Cnn2D { struct RangeLimit { uint32_t min; @@ -87,6 +91,8 @@ class Validator { static void ThrowIfNotEmpty(const std::string prefix, const std::string error); public: + Validator() = default; + void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const; diff --git a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp index eed44b0ef35324..4c4ceb85d14802 100644 --- a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp @@ -65,9 +65,11 @@ inline std::pair Fin if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) { return std::make_pair(nullptr, nullptr); } - auto parent_dims = parent->outData[0]->getDims(); - // Check if the previous layer has all dimensions except one to be equal to 1 - if (std::count_if(std::begin(parent_dims), std::end(parent_dims), [](size_t dim) { return dim != 1; }) > 1) { + // Check if reshape is expected for this pattern: + // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1 + if (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::C) != 1 && + (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H) != 1 || + GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W) != 1)) { return std::make_pair(nullptr, nullptr); } } diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index c40b97209e1075..bb3451c0aa7129 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -63,6 +63,7 @@ #include "transformations/swap_input_matmul_gna.hpp" #include "transformations/convert_matmul_to_pointwise_convolution.hpp" #include "transformations/split_convolution_with_large_buffer_size.hpp" +#include "transformations/decompose_2d_conv.hpp" #include "transformations/convert_padded2valid_conv.hpp" #include @@ -470,7 +471,6 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ auto data = input.second->getInputData(); for (auto && nextToInputLayer : getInputTo(data)) { if (!LayerInfo(nextToInputLayer.second).isFakeQuantize()) { - inputIdx++; continue; } // replacing scale factor from this fq layer @@ -493,6 +493,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ scaleInput = (fqLayer.getLevels() - 1) / (2 * maxAbsVal); } + IE_ASSERT(config.inputScaleFactors.size() > inputIdx); + IE_ASSERT(inputsDesc->inputScaleFactors.size() > inputIdx); + if (!config.inputScaleFactors.empty()) { gnalog() << "Scale factor calculated during model quantization (" << scaleInput << ") will be used instead of user input (" << inputsDesc->inputScaleFactors[inputIdx] << ").\n"; @@ -505,9 +508,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ config.inputScaleFactors[inputIdx] = scaleInput; inputsDesc->inputScaleFactors[inputIdx] = scaleInput; - - inputIdx++; } + + inputIdx++; } } @@ -671,6 +674,11 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer, void GNAPlugin::LoadNetwork(CNNNetwork & _network) { OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork"); std::shared_ptr convertedNetwork; + + if (!gnaFlags->sw_fp32) { + InitGNADevice(); + } + if (_network.getFunction()) { CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network); const auto& graph = clonedNetwork.getFunction(); @@ -680,6 +688,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } // TODO enable this transformation for networks with convolutions if (!ngraph::op::util::has_op_with_type(graph)) { manager.register_pass(); @@ -752,12 +765,14 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); passes->registerPass(); passes->registerPass(); - passes->registerPass(); passes->registerPass(); passes->registerPass(); passes->registerPass(); @@ -775,7 +790,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { #if GNA_LIB_VER == 2 passes->registerPass(); #endif - passes->registerPass(); passes->registerPass(); passIdx = passes->run(passIdx); }; @@ -867,15 +881,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // fill in extra storage with memory layers graphCompiler.fillMemoryConnections(memoryPairs); - if (!graphCompiler.memory_connection.empty()) { + if (!graphCompiler.memory_connection.empty() && gnaFlags->gna_lib_async_threads_num != 1) { + // TODO: check if updating the number of threads is needed for sw_fp32 gnaFlags->gna_lib_async_threads_num = 1; + if (!gnaFlags->sw_fp32) + InitGNADevice(); } if (gnaFlags->sw_fp32) { gnamem.reset(new gna_memory_type(memory::make_polymorph>())); graphCompiler.setGNAMemoryPtr(gnamem); - } else { - InitGNADevice(); } // keep inputs information and create input primitives diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp index e18847e851c1c5..f5e28e10aed130 100644 --- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp @@ -90,8 +90,8 @@ void Config::UpdateFromMap(const std::map& config) { } } auto scale_factor = InferenceEngine::CNNLayer::ie_parse_float(value); - if (fp32eq(scale_factor, 0.0f)) { - THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported"; + if (fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) { + THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported"; } // missing scale factors are set to be 1.0f if (inputScaleFactors.size() <= input_index) { diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index ae731465025e05..f4e5fc7a9316d9 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -1530,16 +1530,7 @@ void SubstituteScaleShiftBroadCastPass::run() { continue; } - // only 3d scaleshift supported where number of c is arbitrary - auto lastD = reshape_batch ? dataDims[1] : dataDims.back(); - if (lastD != weightsElements) { - THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name - << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length"; - } - if (dataDims.size() == 2) { - THROW_GNA_EXCEPTION << "For layer: " << l->name - << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")"; - } + // TODO: add broadcasting rules checks gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n"; if (nElements % scaleShift->_weights->size()) { @@ -2186,7 +2177,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { } if (isFQFuseAllowed) { - getInputTo(prevData).clear(); + getInputTo(prevData).erase(l->name); } // Connect all next layers after FQ to the layer that is before FQ @@ -2220,6 +2211,17 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { } }; + auto transpInfoMatchWeightsSize = [](const std::vector &transpositionInfo, size_t weightsSize, const std::string &layerName) { + size_t totalElements = 0; + for (auto && transpositionInfoPart : transpositionInfo) { + totalElements += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; + } + if (totalElements != weightsSize) { + THROW_GNA_EXCEPTION << layerName << " weights elements from transposition info (" << totalElements + << ") don't match input dimensions (" << weightsSize << ")"; + } + }; + for (auto &&l : *pLayers) { if (LayerInfo(l).isScaleShift()) { std::vector transpositionInfo; @@ -2237,6 +2239,10 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { } auto weightable = dynamic_cast(l.get()); IE_ASSERT(weightable != nullptr); + + size_t totalWeights = weightable->_weights->size(); + transpInfoMatchWeightsSize(transpositionInfo, totalWeights, l->name); + ConvertTensorFromNCHWToNHWC(weightable->precision.size(), 1, weightable->_weights->size(), weightable->_weights->cbuffer().as(), true, transpositionInfo); if (weightable->_biases) { @@ -2270,14 +2276,9 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { // If we found a split it's not possible to rotate data THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a split before it"; } - size_t totalColumns = 0; - for (auto && transpositionInfoPart : transpositionInfo) { - totalColumns += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; - } - if (weightsColumns != totalColumns) { - THROW_GNA_EXCEPTION << l->name << " weights columns from transposition info (" << totalColumns - << ") don't match input dimensions (" << weightsColumns << ")"; - } + + transpInfoMatchWeightsSize(transpositionInfo, weightsColumns, l->name); + ConvertTensorFromNCHWToNHWC(precision, weightsRows, weightsColumns, weightable->_weights->cbuffer().as(), true, transpositionInfo); gnalog() << l->name << " weights rows transposition info:\n"; @@ -2297,14 +2298,9 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { // If we found a concat it's not possible to rotate data THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a concat after it"; } - size_t totalRows = 0; - for (const auto& transpositionInfoPart : transpositionInfo) { - totalRows += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; - } - if (weightsRows != totalRows) { - THROW_GNA_EXCEPTION << l->name << " weights rows from transposition info (" << totalRows - << ") don't match output dimensions (" << weightsRows << ")"; - } + + transpInfoMatchWeightsSize(transpositionInfo, weightsRows, l->name); + ConvertTensorFromNCHWToNHWC(precision, weightsRows, weightsColumns, weightable->_weights->cbuffer().as(), false, transpositionInfo); gnalog() << l->name << " weights columns transposition info:\n"; diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp index 1701993f387bf2..52d082e7f2ec4b 100644 --- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp +++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp @@ -14,72 +14,26 @@ #include #include #include +#include "utils/transformation_helper.hpp" using namespace GNAPluginNS; NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0); -struct ConvData { - size_t input_height; - size_t input_width; - size_t input_channel_count; - size_t filter_count; - size_t pads_begin_width; - size_t pads_begin_height; - size_t pads_end_width; - size_t pads_end_height; - ngraph::op::PadType padding_type; - ngraph::element::Type element_type; -}; - -static bool VerifyAndGetConvParams(std::shared_ptr conv, ConvData& conv_data) { +static bool VerifyAndGetConvData(std::shared_ptr conv, ConvData& conv_data) { const auto& input = conv->input_value(0); - // We support only 2D conv batch 1 - if (conv->get_dilations().size() != 2 || - conv->get_strides().size() != 2 || - input.get_shape()[0] != 1) { + // We support only batch 1 + if (input.get_shape()[0] != 1) { return false; } - conv_data.padding_type = conv->get_auto_pad(); - conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; - conv_data.input_height = conv->input_value(0).get_shape()[2]; - conv_data.input_width = conv->input_value(0).get_shape()[3]; - conv_data.filter_count = conv->input_value(1).get_shape()[0]; - conv_data.pads_begin_height = conv->get_pads_begin()[0]; - conv_data.pads_begin_width = conv->get_pads_begin()[1]; - conv_data.pads_end_height = conv->get_pads_end()[0]; - conv_data.pads_end_width = conv->get_pads_end()[1]; - conv_data.element_type = conv->get_element_type(); + GetConvData(conv, conv_data); return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width; } -static bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order) { - if (!transpose) - return false; - const ngraph::Output& transpose_order = transpose->input_value(1); - auto transpose_order_dim = transpose_order.get_shape().size(); - - if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size()) - return false; - - auto const_with_order_values = std::dynamic_pointer_cast(transpose_order.get_node_shared_ptr()); - if (!const_with_order_values) - return false; - - const auto data = const_with_order_values->cast_vector(); - if (data.empty()) - return false; - - if (!std::equal(order.begin(), order.end(), data.begin())) - return false; - - return true; -} - static bool VerifyBias(std::shared_ptr bias, const size_t& filter_count) { auto add_const = std::dynamic_pointer_cast(bias->input_value(0).get_node_shared_ptr()); @@ -91,16 +45,6 @@ static bool VerifyBias(std::shared_ptr bias, const size_t& return (add_const && shape_size(add_const->get_shape()) == filter_count); } -static std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size) { - return std::make_shared( - input, // data - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides - std::vector{1, 0}, // begin mask - std::vector{1, 0}); // end mask -} - static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr& conv, const std::shared_ptr padding_const, size_t biggest_padding) { @@ -226,7 +170,7 @@ static bool Convert(std::shared_ptr leading_transpose, ConvData conv_data; - if (!VerifyAndGetConvParams(std::dynamic_pointer_cast(conv), conv_data)) + if (!VerifyAndGetConvData(std::dynamic_pointer_cast(conv), conv_data)) return false; // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) @@ -246,7 +190,7 @@ static bool Convert(std::shared_ptr leading_transpose, return true; } -std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { +static std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { return [=](ngraph::Output output) -> bool { return ngraph::pattern::consumers_count(expected_count) && ngraph::pattern::rank_equals(expected_rank); }; @@ -287,10 +231,8 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() { ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - auto conv_output = conv->output(0).get_node_shared_ptr(); - IE_ASSERT(conv_output != nullptr); - - auto bias_node = std::dynamic_pointer_cast(conv_output); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node); diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp index 9d8a0f10477800..55bef912b9c184 100644 --- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp +++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp @@ -13,11 +13,11 @@ namespace GNAPluginNS { * wrapped with transposes, to a valid convolution with padding added before the leading transpose, * POT precessed models are supported (fake quantized layers omitted below for clarity): * - * Padding - * | + * Padding + * | * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) * | | - * Convolution with padding Convolution with padding + * Convolution with padding Valid convolution * | | * Broadcast Bias (optional) Broadcast Bias (optional) * | | diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp new file mode 100644 index 00000000000000..ec4462782c58bd --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp @@ -0,0 +1,583 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/decompose_2d_conv.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "utils/transformation_helper.hpp" +#include "backend/gna_limitations.hpp" +#include "layers/gna_convolution_layer.hpp" + + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(Decompose2DConv, "Decompose2DConv", 0); +NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBias, "Decompose2DConvTransposedWithBias", 0); +NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBiasAF, "Decompose2DConvTransposedWithBiasAF", 0); + +struct GraphData { + std::shared_ptrleading_transpose; + std::shared_ptrconv; + std::shared_ptrtrailing_transpose; + std::shared_ptraf; + std::shared_ptrmax_pool; + std::shared_ptrlast_op_in_sequence_for_replacement; + std::shared_ptrbias_const; + size_t conv_count; + size_t pool_size_width; + size_t pool_stride_width; + // TODO: currently 2D max pool is not supported + //size_t pool_size_height; + //size_t pool_stride_height; +}; + +static bool VerifyAndGetConvData(std::shared_ptr conv, ConvData& conv_data) { + const auto& input = conv->input_value(0); + const auto& filters = conv->input_value(1); + + // We support only batch == 1 + if (input.get_shape()[0] != 1) { + return false; + } + + size_t filter_height = filters.get_shape()[2]; + size_t filter_width = filters.get_shape()[3]; + + if (filter_width > GNALimitations::copyMaxGrouping || filter_height > GNALimitations::copyMaxGrouping) { + return false; + } + + GetConvData(conv, conv_data); + + IE_ASSERT(conv_data.output_channel_count == conv->get_output_shape(0)[1]); + + return true; +} + +static std::shared_ptr VerifyBiasAndReshapeConst(std::shared_ptr conv_bias, const ConvData& conv_data) { + auto add_const = std::dynamic_pointer_cast(conv_bias->input_value(1).get_node_shared_ptr()); + + if (add_const) { + auto bias_size = shape_size(add_const->get_shape()); + + // The add may be a normal add not conv bias, then we just go further + if (bias_size == conv_data.filter_count) { + return ngraph::op::util::make_try_fold(add_const, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false); + } + } + // Bias size does not match (or dynamic bias), can't decompose such convolution + return nullptr; +} + +static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr max_pool) { + auto pool_filter = max_pool->get_kernel(); + auto pool_strides = max_pool->get_strides(); + + // Check Max Pool padding and limitations + if (max_pool->get_auto_pad() != ngraph::op::PadType::VALID || + pool_filter.size() != 2 || pool_strides.size() != 2 || + pool_filter[0] > GNALimitations::maxPoolMaxWindowSize) + return false; + + graph_data.pool_size_width = pool_filter[1]; + graph_data.pool_stride_width = pool_strides[1]; + return true; +} + +static size_t CalculateConvCount(const ConvData& conv_data) { + // Check if split of plane due to GNA HW limitations of 768 filter elements is possible + size_t conv_count = 1; + size_t total_factorized_conv_channel_count = (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width); + while (total_factorized_conv_channel_count / conv_count > GNALimitations::convFilterMaxSize || + total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0) + conv_count++; + + return conv_count; +} + +static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) { + // Calculate the number of splits required + graph_data.conv_count = CalculateConvCount(conv_data); + + // Concat (copy) layer limitation allows to split up to a certain limit + // Currently we are able to split only convolutions without pooling in horizontal dimension + if (graph_data.conv_count > GNALimitations::copyMaxGrouping || + ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) + return false; + + // GNA supported features or handled otherwise - there is no need to decompose such convolution + if (graph_data.conv_count == 1 && (((conv_data.input_height == 1 || conv_data.input_width == 1) && + conv_data.filter_dilation_width == 1 && conv_data.filter_dilation_height == 1) || + GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.filter_width, conv_data.filter_stride_width))) + return false; + + return true; +} + +static std::vector> Split2DConvFilters(std::shared_ptr& filters, + const bool& vertical_permute, const bool& horizontal_permute, const size_t& split_channels) { + + if (!horizontal_permute && !vertical_permute && split_channels == 1) + return {filters}; + + std::vector > result; + ngraph::Shape reshape_shape; + auto flat_filters = filters->outputs(); + const auto filter_shape = filters->get_output_shape(0); + IE_ASSERT(filter_shape.size() == 4); + + if (split_channels > 1) { + const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); + const auto split = std::make_shared(filters, axis_node, split_channels); + flat_filters = split->outputs(); + } + + for (size_t split_index = 0; split_index < split_channels; split_index++) { + ngraph::Output& flat_filter = flat_filters[split_index]; + if (horizontal_permute && !vertical_permute) { + result.push_back(std::make_shared(flat_filter, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 1, 3, 2}))); + } else { + result.push_back(flat_filter.get_node_shared_ptr()); + } + } + + if (vertical_permute && horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] * filter_shape[3] / split_channels, 1, 1}; + } else if (vertical_permute && !horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] / split_channels, 1, filter_shape[3]}; + } else if (!vertical_permute && horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[3] / split_channels, filter_shape[2], 1}; + } else { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] / split_channels, filter_shape[2], filter_shape[3]}; + } + + for (auto &new_filter : result) + new_filter = ngraph::op::util::make_try_fold(new_filter, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, reshape_shape), false); + + return result; +} + +static ngraph::OutputVector SplitInput(const GraphData& graph_data, ConvData& conv_data) { + // We need to have proper input shape first + ngraph::OutputVector split_planes; + auto padded_input_plane = std::make_shared(graph_data.leading_transpose->input_value(0), + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + ngraph::Shape{1, shape_size(graph_data.leading_transpose->input_value(0).get_shape())}), false); + copy_runtime_info(graph_data.conv, padded_input_plane); + + if (graph_data.conv_count > 1) { + // If we have split input plane and convolutions due to GNA limitation - we must sum their results at the end + conv_data.input_channel_count /= graph_data.conv_count; + + auto reshape_before_transpose = std::make_shared(padded_input_plane, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + {shape_size(padded_input_plane->get_shape()) / graph_data.conv_count, graph_data.conv_count}), false); + + auto transpose_before_channel_wise_split = std::make_shared(reshape_before_transpose, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + const auto split = std::make_shared(transpose_before_channel_wise_split, axis_node, graph_data.conv_count); + split_planes = split->outputs(); + } else { + split_planes.push_back(padded_input_plane); + } + + return split_planes; +} + +static std::vector> SplitFilters(const GraphData& graph_data, ConvData& conv_data) { + // If the input plane exceeds GNA limits and we have split into several convolutions, then we need to split filter data as well; + // we also need to take filter height and potential dilation into account when modifying the filters + auto filter_values = std::dynamic_pointer_cast(graph_data.conv->input_value(1).get_node_shared_ptr()); + bool vertical_permute = (conv_data.filter_height > 1); + bool horizontal_permute = (conv_data.filter_dilation_width > 1); + std::vector> h_1_filters{}; + + h_1_filters = Split2DConvFilters(filter_values, vertical_permute, horizontal_permute, graph_data.conv_count); + + for (auto filter : h_1_filters) + copy_runtime_info(graph_data.conv, filter); + + return h_1_filters; +} + +static void TransformInput(const GraphData& graph_data, const ConvData& conv_data, ngraph::Output& split_input_plane) { + /* + * Padded row - NHWC order + * | + * Split in vertical dim (filter height) + * / | \ + * Concat + * | + * Transpose + */ + + // First we need to prepare flat (height = 1) slices of input data proper for flattened (height = 1) filter size + ngraph::OutputVector dilated_input_planes; + for (size_t filter_height = 0; filter_height < conv_data.filter_height; filter_height++) { + size_t offset = filter_height * conv_data.filter_dilation_height * conv_data.input_width * conv_data.input_channel_count; + auto slice = FlatCrop(split_input_plane, offset, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height); + copy_runtime_info(graph_data.conv, slice); + dilated_input_planes.push_back(slice); + } + + // Interleaving dilated input planes + auto dilated_chunks_concat = std::make_shared(dilated_input_planes, 0); + + auto transposed_dilated_chunks = std::make_shared(dilated_chunks_concat, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + // Flattening of interleaved input planes + auto flattened_dilated_transposed_input = std::make_shared(transposed_dilated_chunks, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + {(size_t)1, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height * conv_data.filter_height}), false); + + copy_runtime_info(graph_data.conv, {dilated_chunks_concat, flattened_dilated_transposed_input, transposed_dilated_chunks }); + split_input_plane = flattened_dilated_transposed_input; +} + +// Valid 1D (decomposed 2D) convolution wrapped with transposes NHWC => NCHW => conv => NCHW => NHWC +static std::shared_ptr Create1DConv(const GraphData& graph_data, const ConvData& conv_data, const ngraph::Output& input, + std::shared_ptr filters, const size_t conv_index, const size_t h_index) { + // Transpose NHWC => NCHW + std::shared_ptr nchw_input = std::make_shared(input, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0)); + + // 1D Convolution + auto conv = std::make_shared(nchw_input, filters, + ngraph::Strides{1, conv_data.filter_stride_width}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, + ngraph::Strides{1, 1}, ngraph::op::PadType::VALID); + std::string conv_name = graph_data.conv->get_friendly_name() + "_H_" + std::to_string(h_index) + "_CH_" + std::to_string(0); + conv->set_friendly_name(conv_name); + + // Bias + std::shared_ptr last_conv_block_op = conv; + if (graph_data.bias_const && conv_index == 0) { + last_conv_block_op = std::make_shared(conv, graph_data.bias_const); + copy_runtime_info(graph_data.conv, last_conv_block_op); + } + + // Max pooling + if ((graph_data.max_pool && graph_data.pool_size_width > 1) || graph_data.pool_stride_width > 1) { + last_conv_block_op = std::make_shared(last_conv_block_op, + ngraph::Strides{1, graph_data.pool_stride_width}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, + ngraph::Shape{1, graph_data.pool_size_width}, graph_data.max_pool->get_rounding_type(), ngraph::op::PadType::VALID); + } + + // Activation function + if (graph_data.af && graph_data.conv_count == 1) { + auto af_result = graph_data.af->copy_with_new_inputs({last_conv_block_op}); + copy_runtime_info(conv, af_result); + last_conv_block_op = af_result; + } + + // Transpose NCHW => NHWC + auto nhwc_output = std::make_shared(last_conv_block_op, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1})->output(0)); + copy_runtime_info(graph_data.conv, {nchw_input, conv, nhwc_output}); + return nhwc_output; +} + +static std::shared_ptr CreateDecomposedConv(const GraphData& graph_data, ConvData& conv_data, + ngraph::Output& reduced_input_plane, const std::vector>& h_1_filters, const size_t conv_index) { + ngraph::OutputVector result_chunks; + std::shared_ptr last_op; + bool horizontal_permute = (conv_data.filter_dilation_width > 1); + size_t h_1_filter_channel_count = (conv_data.input_channel_count * conv_data.filter_height); + + for (size_t output_height = 0; output_height < conv_data.output_height; output_height += conv_data.filter_stride_height) { + size_t offset = output_height * conv_data.input_width * h_1_filter_channel_count; + auto row = (conv_data.output_height == 1) ? reduced_input_plane : + FlatCrop(reduced_input_plane, offset, conv_data.input_width * h_1_filter_channel_count); + /* + * Padded row + * | + * ??? ??? + * | + * Split in vertical dim + * / | \ + * Concat + * | + * Permute + * | + * Transpose (NHWC => NCHW) + * | + * 1D Conv (Bias | MaxPooling) + * | + * Transpose (NCHW => NHWC) + */ + auto nhwc_conv_y_input = row; + + if (horizontal_permute) { + // Horizontal split - transform input accordingly + ngraph::OutputVector dilated_chunks; + std::shared_ptr dilated_chunks_concat = nhwc_conv_y_input.get_node_shared_ptr(); + + if (conv_data.filter_width > 1) { + for (size_t filter_width = 0; filter_width < conv_data.filter_width; filter_width++) { + size_t offset = filter_width * conv_data.filter_dilation_width * h_1_filter_channel_count; + auto slice = FlatCrop(row, offset, h_1_filter_channel_count * conv_data.output_width); + copy_runtime_info(graph_data.conv, slice); + dilated_chunks.push_back(slice); + } + + dilated_chunks_concat = std::make_shared(dilated_chunks, 0); + } + + auto transposed_dilated_chunks = std::make_shared(dilated_chunks_concat, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + auto flattened_dilated_conv_input = std::make_shared(transposed_dilated_chunks, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + ngraph::Shape{1, 1, conv_data.output_width, h_1_filter_channel_count * conv_data.filter_width}), false); + + copy_runtime_info(graph_data.conv, ngraph::NodeVector{flattened_dilated_conv_input, transposed_dilated_chunks, dilated_chunks_concat}); + + nhwc_conv_y_input = flattened_dilated_conv_input; + } else { + // If no horizontal split is done, only reshape is required before decomposed convolution + nhwc_conv_y_input = std::make_shared(nhwc_conv_y_input, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + ngraph::Shape{1, 1, conv_data.input_width, h_1_filter_channel_count}), false); + } + + // Pointwise convolutions + // Valid 1D convolution wrapped with transposes NHWC => NCHW => Conv => NCHW => NHWC + // Activation function can be fused with convolution only if it isn't split + auto nhwc_y_output = Create1DConv(graph_data, conv_data, nhwc_conv_y_input, h_1_filters[conv_index], conv_index, output_height); + result_chunks.push_back(nhwc_y_output); + last_op = nhwc_y_output; + } + + // Horizontal dimemsion greater than 1 + if (result_chunks.size() > 1) { + // Concat in horizontal dimension + // In NHWC index of H is 1 + auto concatenated_sub_results = std::make_shared(result_chunks, 1); + copy_runtime_info(graph_data.conv, concatenated_sub_results); + last_op = concatenated_sub_results; + } + return last_op; +} + +static void Decompose(const GraphData& graph_data, ConvData& conv_data) { + std::vector> partial_conv_results; + + // Split input due to GNA filter element count limit + auto split_planes = SplitInput(graph_data, conv_data); + // Split filters due to GNA filter element count limit, 2D convolution shape, or dilations + auto h_1_filters = SplitFilters(graph_data, conv_data); + + // Do transformations in each of the splits created above + for (size_t conv_index = 0; conv_index < graph_data.conv_count; conv_index++) { + ngraph::Output& split_input_plane = split_planes[conv_index]; + + // Input data needs to be prepared before 2D convolution decomposition + if (conv_data.filter_height > 1) { + TransformInput(graph_data, conv_data, split_input_plane); + } + + auto flat_conv = CreateDecomposedConv(graph_data, conv_data, split_input_plane, h_1_filters, conv_index); + partial_conv_results.push_back(flat_conv); + } + + std::shared_ptr conv_result = partial_conv_results.front(); + for (size_t i = 1; i < partial_conv_results.size(); i++) { + auto add_result = std::make_shared(partial_conv_results[i], conv_result); + copy_runtime_info(graph_data.conv, add_result); + conv_result = add_result; + } + + // TODO: Max Pool 2D case + //if (graph_data.max_pool && (graph_data.pool_size_height > 1 || graph_data.pool_stride_height > 1)) { + //} + + // Activation function after trailing Transpose NCHW->NHWC + if (graph_data.af && graph_data.conv_count > 1) { + auto af_result = graph_data.af->copy_with_new_inputs({conv_result}); + copy_runtime_info(graph_data.conv, af_result); + conv_result = af_result; + } + // We need to put the same name as before for the Convolution layer, so its output can be used as network result + std::string conv_result_name = graph_data.last_op_in_sequence_for_replacement->get_friendly_name(); + replace_node(graph_data.last_op_in_sequence_for_replacement, conv_result); + conv_result->set_friendly_name(conv_result_name); +} + +static bool Convert(std::shared_ptr leading_transpose, + std::shared_ptr conv, + std::shared_ptr trailing_transpose, + std::shared_ptr bias, + std::shared_ptr max_pool, + std::shared_ptr af, + std::shared_ptr last_op_for_replacement) { + + GraphData graph_data{std::dynamic_pointer_cast(leading_transpose), + std::dynamic_pointer_cast(conv), + std::dynamic_pointer_cast(trailing_transpose), + std::dynamic_pointer_cast(af), + std::dynamic_pointer_cast(max_pool), + last_op_for_replacement, nullptr, 1, 1, 1}; + ConvData conv_data; + + if (!VerifyAndGetConvData(std::dynamic_pointer_cast(conv), conv_data)) + return false; + + // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) + // or similar cases, so required network must be in NHWC order like in TF + if (!TransposeOrderMatches(std::dynamic_pointer_cast(leading_transpose), {0, 3, 1, 2})) + return false; + + if (!TransposeOrderMatches(std::dynamic_pointer_cast(trailing_transpose), {0, 2, 3, 1})) + return false; + + if (bias && !(graph_data.bias_const = VerifyBiasAndReshapeConst(std::dynamic_pointer_cast(bias), conv_data))) + return false; + + if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast(max_pool))) + return false; + + if (!ShouldDecompose(graph_data, conv_data)) + return false; + + // All checks applied - now we may start decomposition + Decompose(graph_data, conv_data); + + return true; +} + +static bool VerifyBias(std::shared_ptr conv, std::shared_ptr bias) { + auto add_const = std::dynamic_pointer_cast(bias->input_value(1).get_node_shared_ptr()); + + if (!add_const) { + add_const = std::dynamic_pointer_cast(bias->input_value(0).get_node_shared_ptr()); + } + + if (!add_const) { + auto bias_size = shape_size(add_const->get_shape()); + auto conv_filter_count = conv->input_value(1).get_shape()[0]; + if (bias_size == conv_filter_count) + return true; + } + return false; +} + +Decompose2DConv::Decompose2DConv() { + MATCHER_SCOPE(Decompose2DConv); + + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input}, + consumers_and_rank(1, 4)); + auto conv = ngraph::pattern::wrap_type( + {leading_transpose, ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4))}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({conv, const_input}, + ngraph::pattern::consumers_count(1)); + auto max_pool = ngraph::pattern::wrap_type({bias}, + ngraph::pattern::consumers_count(1)); + auto af1 = ngraph::pattern::wrap_type({bias}, ngraph::pattern::consumers_count(1)); + auto af2 = ngraph::pattern::wrap_type({max_pool}, ngraph::pattern::consumers_count(1)); + auto transpose_input = std::make_shared(ngraph::OutputVector{conv, bias, max_pool, af1, af2}); + auto trailing_transpose = ngraph::pattern::wrap_type({transpose_input, const_input}, + consumers_and_rank(1, 4)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); + auto max_pool_it = pattern_map.find(max_pool); + auto max_pool_node = (max_pool_it == std::end(pattern_map) ? nullptr : max_pool_it->second.get_node_shared_ptr()); + auto af1_it = pattern_map.find(af1); + auto af2_it = pattern_map.find(af2); + auto af_node = (af1_it == std::end(pattern_map) ? + ((af2_it == std::end(pattern_map) ? nullptr : af2_it->second.get_node_shared_ptr())) : af1_it->second.get_node_shared_ptr()); + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node, max_pool_node, af_node, + pattern_map.at(trailing_transpose).get_node_shared_ptr()); + }; + + auto m = std::make_shared(trailing_transpose, matcher_name); + this->register_matcher(m, callback); +} + +Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() { + MATCHER_SCOPE(Decompose2DConvTransposedWithBias); + + auto const_input_i64 = ngraph::pattern::wrap_type(ngraph::pattern::type_matches(ngraph::element::i64)); + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input_i64}, + consumers_and_rank(1, 4)); + auto conv = ngraph::pattern::wrap_type( + {leading_transpose, ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4))}, + consumers_and_rank(1, 4)); + auto trailing_transpose = ngraph::pattern::wrap_type({conv, const_input_i64}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({trailing_transpose, const_input}, + ngraph::pattern::consumers_count(1)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())) + return false; + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), nullptr, nullptr, + pattern_map.at(bias).get_node_shared_ptr()); + }; + + auto m = std::make_shared(bias, matcher_name); + this->register_matcher(m, callback); +} + +Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() { + MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF); + + auto const_input_i64 = ngraph::pattern::wrap_type(ngraph::pattern::type_matches(ngraph::element::i64)); + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input_i64}, + consumers_and_rank(1, 4)); + auto conv = ngraph::pattern::wrap_type( + {leading_transpose, ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4))}, + consumers_and_rank(1, 4)); + auto trailing_transpose = ngraph::pattern::wrap_type({conv, const_input_i64}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({trailing_transpose, const_input}, + ngraph::pattern::consumers_count(1)); + auto af = ngraph::pattern::wrap_type({bias}, + ngraph::pattern::consumers_count(1)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())) + return false; + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), + nullptr, pattern_map.at(af).get_node_shared_ptr(), pattern_map.at(af).get_node_shared_ptr()); + }; + + auto m = std::make_shared(af, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp new file mode 100644 index 00000000000000..4fbaf47ff7252b --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +/** + * @brief Decompose a 2D convolution, wrapped with transposes, + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Broadcast Bias (optional) Broadcast Bias (optional) + * | | + * Max Pooling (optional) Max Pooling (optional) + * | | + * Activation Function (optional) Activation Function (optional) + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * + */ +class Decompose2DConv : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConv(); +}; + +/** + * @brief Decomopose a 2D convolution wrapped with transposes, with bias after trailing transpose, + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * | | + * Broadcast Bias Broadcast Bias + * + */ +class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConvTransposedWithBias(); +}; + +/** + * @brief Decomopose a 2D convolution wrapped with transposes, with bias + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * | | + * Broadcast Bias Broadcast Bias + * | | + * Activation Function Activation Function + * + */ +class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConvTransposedWithBiasAF(); +}; + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp new file mode 100644 index 00000000000000..79fe863a18fbb2 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include +#include +#include "transformation_helper.hpp" + + +namespace GNAPluginNS { + +void GetConvData(std::shared_ptr conv, ConvData& conv_data) { + conv_data.output_height = conv->get_output_shape(0)[2]; + conv_data.output_width = conv->get_output_shape(0)[3]; + conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; + conv_data.input_height = conv->input_value(0).get_shape()[2]; + conv_data.input_width = conv->input_value(0).get_shape()[3]; + conv_data.filter_count = conv->input_value(1).get_shape()[0]; + conv_data.filter_channel_count = conv->input_value(1).get_shape()[1]; + conv_data.filter_height = conv->input_value(1).get_shape()[2]; + conv_data.filter_width = conv->input_value(1).get_shape()[3]; + conv_data.filter_dilation_height = conv->get_dilations()[0]; + conv_data.filter_dilation_width = conv->get_dilations()[1]; + conv_data.filter_stride_height = conv->get_strides()[0]; + conv_data.filter_stride_width = conv->get_strides()[1]; + conv_data.output_channel_count = conv_data.filter_count; + conv_data.pads_begin_height = conv->get_pads_begin()[0]; + conv_data.pads_begin_width = conv->get_pads_begin()[1]; + conv_data.pads_end_height = conv->get_pads_end()[0]; + conv_data.pads_end_width = conv->get_pads_end()[1]; + conv_data.padding_type = conv->get_auto_pad(); + conv_data.element_type = conv->get_element_type(); +} + +std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { + return [=](ngraph::Output output) -> bool { + return ngraph::pattern::consumers_count(expected_count)(output) && ngraph::pattern::rank_equals(expected_rank)(output); + }; +} + +bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order) { + if (!transpose) + return false; + const ngraph::Output& transpose_order = transpose->input_value(1); + auto transpose_order_dim = transpose_order.get_shape().size(); + + if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size()) + return false; + + auto const_with_order_values = std::dynamic_pointer_cast(transpose_order.get_node_shared_ptr()); + if (!const_with_order_values) + return false; + + const auto data = const_with_order_values->cast_vector(); + if (data.empty()) + return false; + + if (!std::equal(order.begin(), order.end(), data.begin())) + return false; + + return true; +} + +std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size) { + return std::make_shared( + input, // data + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides + std::vector{1, 0}, // begin mask + std::vector{1, 0}); // end mask +} + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp new file mode 100644 index 00000000000000..14fca200f7b196 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +namespace GNAPluginNS { + +struct ConvData { + size_t input_height; + size_t input_width; + size_t input_channel_count; + size_t filter_height; + size_t filter_width; + size_t filter_count; + size_t filter_channel_count; + size_t filter_dilation_height; + size_t filter_dilation_width; + size_t filter_stride_height; + size_t filter_stride_width; + size_t output_height; + size_t output_width; + size_t output_channel_count; + size_t pads_begin_width; + size_t pads_begin_height; + size_t pads_end_width; + size_t pads_end_height; + ngraph::op::PadType padding_type; + ngraph::element::Type element_type; +}; + +/** + * @brief gets all convolution related data into a struct for further processing + * @param conv convolution node to get data of + * @param conv_data convolution data structure to put data into + * @return void + */ +void GetConvData(std::shared_ptr conv, ConvData& conv_data); + +/** + * @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer + * @param expected_count expected consumers count for of node + * @param expected_rank expected node rank + * @return predicate function wrapper + */ +std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank); + +/** + * @brief checks whether transpose matches a given order + * @param transpose transpose layer + * @param order order of transposition to be compared with + * @return true if the order matches, false otherwise + */ +bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order); + +/** + * @brief performs a crop of a flattened input tensor + * @param input input layer + * @param offset offset to start the crop at* + * @param size size of the crop + * @return pointer to the newly created slice + */ +std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size); +} // namespace GNAPluginNS diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp index 7171363e7830f2..2b8d2f4f261667 100644 --- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp +++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp @@ -77,7 +77,7 @@ void HeteroInferRequest::SetBlob(const std::string& name, const InferenceEngine: if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { r->SetBlob(name, data, foundInput->getPreProcess()); } - } catch (const InferenceEngine::NotFound& ex) {} + } catch (const InferenceEngine::NotFound&) {} } } diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index aeb0386e85c878..8e8a82709031e3 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -77,7 +77,7 @@ endif() addVersionDefines(ie_version.cpp CI_BUILD_NUMBER) -set (PUBLIC_HEADERS_DIR "${IE_MAIN_SOURCE_DIR}/include") +set (PUBLIC_HEADERS_DIR "${IE_MAIN_SOURCE_DIR}/include/ie") file (GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.hpp @@ -120,10 +120,12 @@ ie_faster_build(${TARGET_NAME}_obj ) target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API - $) + $ + $) target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $ + $ $) target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" @@ -160,11 +162,12 @@ if (TBBBIND_2_4_FOUND) endif() target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMAKE_DL_LIBS} Threads::Threads - ngraph inference_engine_transformations) + ngraph::frontend_manager::static inference_engine_transformations + PUBLIC ngraph) target_include_directories(${TARGET_NAME} INTERFACE $ - $ + $ PRIVATE $ $) @@ -200,7 +203,7 @@ if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() -target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph +target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph ngraph::frontend_manager::static inference_engine_transformations pugixml::static) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) @@ -214,7 +217,7 @@ set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s # Export for build tree -export(TARGETS ${TARGET_NAME} NAMESPACE IE:: +export(TARGETS ngraph ${TARGET_NAME} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake") # Export for developer package @@ -227,7 +230,14 @@ list(APPEND core_components ngraph) list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR" "IE_PARALLEL_CMAKE") -if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP}) +# define variables for InferenceEngineConfig.cmake +if(THREADING MATCHES "^(TBB|TBB_AUTO)$") + set(IE_TBB_DIR "${TBB_DIR}") + list(APPEND PATH_VARS "IE_TBB_DIR") +endif() + +# install only downloaded TBB, system one is not installed +if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND TBBROOT MATCHES ${TEMP}) ie_cpack_add_component(tbb REQUIRED) list(APPEND core_components tbb) @@ -247,8 +257,6 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH COMPONENT tbb) set(IE_TBB_DIR_INSTALL "external/tbb/cmake") - set(IE_TBB_DIR "${TBB_DIR}") - list(APPEND PATH_VARS "IE_TBB_DIR") install(FILES "${TBB}/cmake/TBBConfig.cmake" "${TBB}/cmake/TBBConfigVersion.cmake" @@ -261,7 +269,7 @@ endif() ie_cpack_add_component(core REQUIRED DEPENDS ${core_components}) ie_cpack_add_component(core_dev REQUIRED core ngraph_dev) -install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR} +install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}/include COMPONENT core_dev) install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets @@ -299,7 +307,7 @@ configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceE INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}" PATH_VARS ${PATH_VARS}) -set(IE_INCLUDE_DIR "include") +set(IE_INCLUDE_DIR "include/ie") set(IE_NGRAPH_DIR "../ngraph/cmake") set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}") set(IE_PARALLEL_CMAKE "share/ie_parallel.cmake") diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp index 1f05ca0098c3da..f53894e7d2d72f 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp @@ -35,6 +35,9 @@ #include +#include +#include + #include "ie_ngraph_utils.hpp" #include "exec_graph_info.hpp" #include "ie_itt.hpp" @@ -88,12 +91,12 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph:: void CNNNetworkNGraphImpl::validateFunctionNames() const { // nGraph function parameters and pre-Results operations should have unique names - std::unordered_set unique_names; + std::unordered_map> unique_names; for (const auto& param : _ngraph_function->get_parameters()) { if (unique_names.count(param->get_friendly_name())) { IE_THROW() << "Function contains several inputs with one friendly name!"; } - unique_names.insert(param->get_friendly_name()); + unique_names.insert({param->get_friendly_name(), param}); } for (const auto& result : _ngraph_function->get_results()) { const auto& parent = result->get_input_node_shared_ptr(0); @@ -101,10 +104,10 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const { if (parent->get_output_size() > 1) { name += "." + std::to_string(result->get_input_source_output(0).get_index()); } - if (unique_names.count(name) && !ngraph::op::is_parameter(parent)) { - IE_THROW() << "Function contains several inputs and outputs with one friendly name!"; + if (unique_names.count(name) && !ngraph::op::is_parameter(parent) && parent != unique_names.at(name)) { + IE_THROW() << "Function contains several inputs and outputs with one friendly name: " << name; } - unique_names.insert(name); + unique_names.insert({name, parent}); } } @@ -364,13 +367,10 @@ CNNNetworkNGraphImpl::reshape(const std::map& bool parameter_replaced = false; for (size_t i = 0; i < params.size(); i++) { - const auto& param = params[i]; + auto& param = params[i]; if (inputShapes.find(param->get_friendly_name()) == inputShapes.end()) continue; - ::ngraph::PartialShape shape(inputShapes.at(param->get_friendly_name())); - auto newParam = std::make_shared<::ngraph::op::Parameter>(param->get_element_type(), shape); - newParam->set_friendly_name(param->get_friendly_name()); - _ngraph_function->replace_parameter(i, newParam); + param->set_partial_shape(inputShapes.at(param->get_friendly_name())); parameter_replaced = true; } if (parameter_replaced) @@ -392,6 +392,8 @@ CNNNetworkNGraphImpl::reshape(const std::map& ::ngraph::pass::Manager manager; // resolves dynamism by replacing dynamic operation with static version manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); + manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(); + manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(); manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>(); manager.register_pass<::ngraph::pass::ConstantFolding>(); // OneHotToLegacy changes output precision diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp index 9e68666b7a36f6..f94a3b6ba1c162 100644 --- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp +++ b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp @@ -127,7 +127,7 @@ void InferRequest::SetCompletionCallbackImpl(std::function { plugin.ImportNetwork(networkStream, config); networkIsImported = true; }); - } catch (const HeaderException& ex) { + } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheManager->removeCacheEntry(blobId); networkIsImported = false; diff --git a/inference-engine/src/inference_engine/ie_layouts.cpp b/inference-engine/src/inference_engine/ie_layouts.cpp index b566693c1552c4..a9308877e7d249 100644 --- a/inference-engine/src/inference_engine/ie_layouts.cpp +++ b/inference-engine/src/inference_engine/ie_layouts.cpp @@ -161,8 +161,8 @@ bool TensorDesc::operator!=(const TensorDesc& rhs) const { return !(*this == rhs); } -Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { - switch (dims.size()) { +Layout TensorDesc::getLayoutByRank(size_t rank) { + switch (rank) { case 0: return Layout::SCALAR; case 1: @@ -180,6 +180,10 @@ Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { } } +Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { + return getLayoutByRank(dims.size()); +} + size_t TensorDesc::offset(const SizeVector& v) const { if (layout == Layout::ANY) IE_THROW() << "Cannot calculate offset for any format!"; diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp index 6043303712dc02..7189a0a098aaa9 100644 --- a/inference-engine/src/inference_engine/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/ie_network_reader.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,26 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& return reader->read(modelStream, exts); } } + // Try to load with FrontEndManager + static ngraph::frontend::FrontEndManager manager; + ngraph::frontend::FrontEnd::Ptr FE; + ngraph::frontend::InputModel::Ptr inputModel; + if (!binPath.empty()) { +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + std::wstring weights_path = FileUtils::multiByteCharToWString(binPath.c_str()); +#else + std::string weights_path = binPath; +#endif + FE = manager.load_by_model(model_path, weights_path); + if (FE) inputModel = FE->load(model_path, weights_path); + } else { + FE = manager.load_by_model(model_path); + if (FE) inputModel = FE->load(model_path); + } + if (inputModel) { + auto ngFunc = FE->convert(inputModel); + return CNNNetwork(ngFunc); + } IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt << " and read the model: " << modelPath << ". Please check that reader library exists in your PATH."; } @@ -248,4 +269,4 @@ CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weig IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader library exists in your PATH."; } -} // namespace InferenceEngine \ No newline at end of file +} // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/include/legacy/ie_layers.h b/inference-engine/src/legacy_api/include/legacy/ie_layers.h index e7ea32467a0654..64fa501966a97e 100644 --- a/inference-engine/src/legacy_api/include/legacy/ie_layers.h +++ b/inference-engine/src/legacy_api/include/legacy/ie_layers.h @@ -1025,7 +1025,8 @@ class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(EltwiseLayer): public CNNLayer { Logical_OR, Logical_XOR, Logical_NOT, - Mean + Mean, + Abs, }; /** diff --git a/inference-engine/src/low_precision_transformations/CMakeLists.txt b/inference-engine/src/low_precision_transformations/CMakeLists.txt index c6306dbc08f067..7f9d34e7149c88 100644 --- a/inference-engine/src/low_precision_transformations/CMakeLists.txt +++ b/inference-engine/src/low_precision_transformations/CMakeLists.txt @@ -28,8 +28,6 @@ ie_faster_build(${TARGET_NAME} ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "Inference Engine LP transformations library") -target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS) - target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations PRIVATE openvino::itt) diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp index fa64037797a384..92caba9f382a5f 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation { +class LP_TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation { public: - AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {} - ~AddTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + AddTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp new file mode 100644 index 00000000000000..4293be82f15d23 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API AlignQuantizationIntervals; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::AlignQuantizationIntervals : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp new file mode 100644 index 00000000000000..fc7f7d30e7f876 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API AlignQuantizationParameters; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::AlignQuantizationParameters : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp index 823c8990110904..2d37f030ae30a0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation { public: - AvgPoolTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + AvgPoolTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp new file mode 100644 index 00000000000000..4c637624e40f3d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include +#include +#include "rt_info/attribute_parameters.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API BaseMatcherPass; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::BaseMatcherPass : public ngraph::pass::MatcherPass { +public: + BaseMatcherPass(const AttributeParameters& params = AttributeParameters()); + AttributeParameters params; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp index 7698cf5b6da3ca..a3cf76a1284470 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ClampTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ClampTransformation : public LayerTransformation { public: - ClampTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + ClampTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp index e2fdc58f1b7e18..46b739959d6c28 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp @@ -13,7 +13,7 @@ #include #include -#include "transformations_visibility.hpp" +#include "low_precision/lpt_visibility.hpp" #include "transformations/rt_info/dequantization_attribute.hpp" namespace ngraph { @@ -21,7 +21,7 @@ namespace pass { namespace low_precision { // template -// class TRANSFORMATIONS_API DequantizationOp : public BaseOp2 { +// class LP_TRANSFORMATIONS_API DequantizationOp : public BaseOp2 { // public: // template // DequantizationOp(Args&&... args) : BaseOp2(std::forward(args)...) { @@ -63,7 +63,7 @@ void copyRuntimeInfo(const ngraph::Node& from, ngraph::Node& to) { } // namespace -class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert { +class LP_TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert { public: DequantizationConvert(const ngraph::Output& arg, const ngraph::element::Type& destination_type) : ngraph::opset1::Convert(arg, destination_type) { @@ -77,7 +77,7 @@ class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert } }; -class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract { +class LP_TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract { public: DequantizationSubtract( const ngraph::Output& arg0, @@ -94,7 +94,7 @@ class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtra } }; -class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply { +class LP_TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply { public: DequantizationMultiply( const Output& arg0, @@ -116,7 +116,7 @@ class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multip } }; -class TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add { +class LP_TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add { public: DequantizationAdd( const ngraph::Output& arg0, diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp index 67c522bb7e3fcf..a9fba5234d1846 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ngraph { namespace pass { @@ -15,7 +16,7 @@ namespace low_precision { typedef std::tuple, std::shared_ptr> FakeQuantizeDequantizationValues; -class FakeQuantizeDequantization { +class LP_TRANSFORMATIONS_API FakeQuantizeDequantization { public: FakeQuantizeDequantization(); diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp index 1c4cd359f5114e..e59ec61c8f4349 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include /** * @def THROW_TRANSFORMATION_EXCEPTION_LPT @@ -19,7 +19,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API Exception : std::exception { +class LP_TRANSFORMATIONS_API Exception : public std::exception { std::shared_ptr buffer; mutable std::string buffer_str; public: @@ -42,7 +42,7 @@ class TRANSFORMATIONS_API Exception : std::exception { #define THROW_TRANSFORMATION_EXCEPTION throw ::ngraph::pass::low_precision::Exception() << __FILE__ << ":" << __LINE__ << " " -class TRANSFORMATIONS_API InferenceEngineLptException : public Exception { +class LP_TRANSFORMATIONS_API InferenceEngineLptException : public Exception { public: InferenceEngineLptException(const std::string& filename, const size_t line, const Node& node) { *this diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp new file mode 100644 index 00000000000000..4c5321b26bef99 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class OperationPerTensorQuantizationRestriction { +public: + using RestrictedPorts = std::vector; + + ngraph::Node::type_info_t operationType; + bool specifyVersion; + std::vector restrictedPorts; + + OperationPerTensorQuantizationRestriction() = default; + OperationPerTensorQuantizationRestriction( + const ngraph::Node::type_info_t operationType, + const bool specifyVersion, + const RestrictedPorts& restrictedPorts) : + operationType(operationType), + specifyVersion(specifyVersion), + restrictedPorts(restrictedPorts) {} + + template + static OperationPerTensorQuantizationRestriction create( + const RestrictedPorts& restrictedPorts = {}, + const bool specifyVersion = false) { + return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts); + } + + template + static RestrictedPorts getPrecisionsByOperationType(std::vector& restrictions) { + for (const auto& restriction : restrictions) { + if (restriction.operationType == T::get_type_info_static()) { + return restriction.restrictedPorts; + } + } + return {}; + } +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp new file mode 100644 index 00000000000000..d22252ee7afd88 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class OperationPrecisionRestriction { +public: + using PrecisionsByPort = std::vector>>; + + ngraph::Node::type_info_t operationType; + bool specifyVersion; + std::vector>> precisionsByPort; + + OperationPrecisionRestriction() = default; + OperationPrecisionRestriction( + const ngraph::Node::type_info_t operationType, + const bool specifyVersion, + const PrecisionsByPort& precisionsByPort) : + operationType(operationType), + specifyVersion(specifyVersion), + precisionsByPort(precisionsByPort) {} + + template + static OperationPrecisionRestriction create( + const PrecisionsByPort& precisionsByPort, + const bool specifyVersion = false) { + return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort); + } + + template + static PrecisionsByPort getPrecisionsByOperationType(std::vector& restrictions) { + for (const auto& restriction : restrictions) { + if (restriction.operationType == T::get_type_info_static()) { + return restriction.precisionsByPort; + } + } + return {}; + } +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp deleted file mode 100644 index 83e8cfc9cc955c..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include "../ilayer_transformations_manager.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -class Subgraph { -public: - Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager); - - bool fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers); - bool empty() const; - - std::vector> quantizationLayers; - std::vector> concatLayers; - std::unordered_map> layers; - -private: - bool atLeastOneIsIntermediate(const std::shared_ptr& node) const; - bool fillSubgraphForQuantization(const std::shared_ptr& fakeQuantize, std::unordered_set& handledLayers); - bool fillSubgraphForIntermediate(const std::shared_ptr& intermediate, std::unordered_set& handledLayers); - bool fill(const std::shared_ptr& concat, std::unordered_set& handledLayers); - const ngraph::pass::ILayerTransformationsManager* layerTransformationsManager; -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp index e381fd5d0a0401..db16f572224293 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp @@ -13,32 +13,21 @@ #include #include "layer_transformation.hpp" -#include "common/subgraph.hpp" #include "common/fake_quantize_dequantization.hpp" namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { public: - ConcatTransformation(const Params& params) : LayerTransformation(params) {} - ~ConcatTransformation() override {}; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ConcatTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; protected: - void addDequantizationLayers( - TransformationContext& context, - ngraph::pass::low_precision::Subgraph& subgraph, - std::function layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const; - static bool isHandled( const TransformationContext& context, const std::vector>& quantizationOperations); @@ -51,14 +40,6 @@ class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { NodeVector& multiplyNodes) const; std::shared_ptr concatenateDeqNodes(NodeVector& nodes) const; - -private: - size_t getMinQuantizationLevels( - const DataPrecision& dataPrecision, - const float maxOutputInterval, - const std::vector& quantizationLayersDetails, - const float outputLowValue, - const float outputHighValue) const; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp deleted file mode 100644 index 48c0a0ef9eaa5f..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -#include - -#include "concat.hpp" -#include "common/subgraph.hpp" -#include "common/fake_quantize_dequantization.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -class TRANSFORMATIONS_API ConcatMultiChannelsTransformation : public ConcatTransformation { -public: - ConcatMultiChannelsTransformation(const Params& params) : ConcatTransformation(params) {} - ~ConcatMultiChannelsTransformation() override {}; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; - -private: - // Go through the parent elements of the layer and fill dequantization collection - // with Dq operations that should be inserted before the layer. - void fillDequantization( - const std::shared_ptr layer, - const std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantization) const; - - FakeQuantizeDequantization getConcatenatedDequantization( - const std::shared_ptr concat, - const std::vector& dequantization) const; - - static FakeQuantizeDequantization getFoldedDequantization( - const std::shared_ptr operation, - const FakeQuantizeDequantization& dequantization, - const size_t sourceOutputIdx); - - bool isMultiChannel(const std::vector>& concatLayers) const noexcept; -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp index ca860903420873..cf7299c9def383 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation { public: - ConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~ConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp index ea2219df6e5863..f9584eb6842e60 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp @@ -7,14 +7,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvertSubtractConstant; +class LP_TRANSFORMATIONS_API ConvertSubtractConstant; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp index e3041a0b08f2c1..5542d04d70adb3 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp @@ -11,12 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation { +class LP_TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation { public: - ConvolutionTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + NGRAPH_RTTI_DECLARATION; + ConvolutionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp index d6bbe504dc6eea..35b5d806be1a7b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp @@ -11,13 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation { +class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation { public: - ConvolutionBackpropDataTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + ConvolutionBackpropDataTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp new file mode 100644 index 00000000000000..819cd11b430306 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp @@ -0,0 +1,61 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/base_matcher_pass.hpp" +#include "low_precision/lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class CreateAttribute; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +enum class AttributeSource { + Node, + OutputPort +}; + +template +class ngraph::pass::low_precision::CreateAttribute : public ngraph::pass::low_precision::BaseMatcherPass { +public: + CreateAttribute(const AttributeSource source = AttributeSource::Node) { + assert((source == AttributeSource::Node) || (source == AttributeSource::OutputPort)); + auto operation = std::is_same::value ? + pattern::any_input() : + pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreateAttribute"); + const auto attribute = ngraph::VariantWrapper::create(op, params); + if (attribute == nullptr) { + return false; + } + } + return true; + }; + + auto matcher = std::make_shared(operation, "CreateAttribute"); + this->register_matcher(matcher, callback); + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp new file mode 100644 index 00000000000000..4104d646e23b0f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include "rt_info/precision_preserved_attribute.hpp" +#include "network_helper.hpp" +#include "lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class CreatePrecisionsDependentAttribute; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::CreatePrecisionsDependentAttribute : public ngraph::pass::MatcherPass { +public: + CreatePrecisionsDependentAttribute() { + auto operation = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreatePrecisionsDependentAttribute"); + auto &rt = node->get_rt_info(); + + const auto precisionPreservedAttribute = std::make_shared>( + std::make_shared(false)); + rt[ngraph::VariantWrapper::type_info.name] = precisionPreservedAttribute; + const auto &targetSharedValue = precisionPreservedAttribute->get()->sharedValue; + + const auto attribute = std::make_shared>>( + std::make_shared()); + rt[ngraph::VariantWrapper>::type_info.name] = attribute; + + ngraph::pass::low_precision::NetworkHelper::reassign( + targetSharedValue, + { + std::dynamic_pointer_cast(attribute->get()), + std::dynamic_pointer_cast(precisionPreservedAttribute->get()) + }); + } + return true; + }; + + auto matcher = std::make_shared(operation, "CreatePrecisionsDependentAttribute"); + this->register_matcher(matcher, callback); + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp index 0fc9d6446897d1..b02ead7321b622 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp @@ -10,12 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation { +class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation { public: - DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {} - ~DepthToSpaceTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + DepthToSpaceTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp index 67cc0f9904136d..c648d6efadc4b0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp @@ -12,7 +12,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation { public: EltwiseBaseTransformation(const Params& params) : LayerTransformation(params) {} bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp index ac75f406a2be98..15975782ef07f5 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp @@ -13,17 +13,20 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation { public: - FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; static bool checkElementwise(const std::shared_ptr& eltwise); private: - std::shared_ptr fuseElementwise(TransformationContext& context, const std::shared_ptr& fakeQuantize) const; + std::shared_ptr fuseElementwise( + TransformationContext& context, + MatcherPass* matcherPass, + const std::shared_ptr& fakeQuantize) const; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp index 0c6da56592e334..45948ca32b72ad 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp @@ -13,11 +13,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation { public: - FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FakeQuantizeDecompositionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp index d41706f920579b..4390b7290e2f60 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation { public: - FoldConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~FoldConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FoldConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp new file mode 100644 index 00000000000000..7f2862fc942288 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/layer_transformation.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API FoldFakeQuantizeTransformation : public LayerTransformation { +public: + NGRAPH_RTTI_DECLARATION; + FoldFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; + bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp index e8f2e864e46e29..4ccc59808ad129 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation { public: - FuseConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp index 8d46c68f3d77d1..b752df52a494cd 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation { public: - FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseFakeQuantizeTransformation(const Params& params); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; private: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp index dea0fa340551b3..d43aa87441eb29 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation { public: - FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseMultiplyToFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseMultiplyToFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp index 2c67aebfcf186a..80d6f22f785eff 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation { public: - FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseSubtractToFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseSubtractToFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp index 0372f0173d9d87..d53c37b8df93b8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp @@ -11,12 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation { +class LP_TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation { public: - GroupConvolutionTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + NGRAPH_RTTI_DECLARATION; + GroupConvolutionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp deleted file mode 100644 index 389584b7448203..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include "transformations_visibility.hpp" - -namespace ngraph { -namespace pass { - -/** - * @brief low precision transformation component interface. - */ -class TRANSFORMATIONS_API ILayerTransformationsManager { -public: - virtual bool isQuantized(const std::shared_ptr& layer) const noexcept = 0; - virtual bool isPrecisionPreserved(const std::shared_ptr& layer) const noexcept = 0; -}; - -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp index 184d1c159fe615..9d454e59542dd8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp @@ -10,12 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation { public: - InterpolateTransformation(const Params& params) : LayerTransformation(params) {} - ~InterpolateTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + InterpolateTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp deleted file mode 100644 index 2d45179a600b9a..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -namespace ngraph { -namespace pass { - -/** - * @brief low precision transformation component interface. - */ -class TRANSFORMATIONS_API IParamsManager { -public: - // TODO FIXME: it is not correct to have a string as a key here, try to use NodeTypeInfo - virtual std::vector getPrecisionsOnActivations(const Node& op) const noexcept = 0; -}; - -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index 06a37ab8b22015..40807928305e85 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -13,8 +13,6 @@ #include #include -#include "iparams_manager.hpp" -#include "ilayer_transformations_manager.hpp" #include "transformation_context.hpp" #include "quantization_details.hpp" #include "low_precision/common/ie_lpt_exception.hpp" @@ -41,7 +39,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API DataPrecision { +class LP_TRANSFORMATIONS_API DataPrecision { public: DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {} @@ -108,6 +106,17 @@ class TRANSFORMATIONS_API DataPrecision { } } + // Return maximum value for quantization level. Quantization level is maximum value for precision. + static float getMaxValue(const size_t maxLevelsForPrecision) { + if (maxLevelsForPrecision == 255ul) { + return 254.f; + } else if (maxLevelsForPrecision == 256ul) { + return 255.f; + } else { + THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision; + } + } + static bool hasNegativeValues(const std::vector& values) { for (const float value : values) { if (value < 0.0) { @@ -148,92 +157,28 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value) } // Base class for all LP transformations, holds some common data structures -class TRANSFORMATIONS_API LayerTransformation { +class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass { public: - enum QuantizedTensorAlignment { - None, - UpdateLevel - }; - class Params { public: Params( - const bool updatePrecisions = true, - const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel, - const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None, - bool supportAsymmetricQuantization = false, - std::vector precisionsOnActivations = { element::u8, element::i8 }, - std::vector precisionsOnWeights = { element::i8 }, - element::Type deqPrecision = element::f32, - bool support3DTensorOnActivations = true, - bool deconvolutionSpecificChannelsRatio = false) : - updatePrecisions(updatePrecisions), - quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations), - quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights), - supportAsymmetricQuantization(supportAsymmetricQuantization), - precisionsOnActivations(precisionsOnActivations), - precisionsOnWeights(precisionsOnWeights), - deqPrecision(deqPrecision), - support3DTensorOnActivations(support3DTensorOnActivations), - deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) { - if (precisionsOnActivations.size() == 0ul) { - THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed"; - } - - if (precisionsOnWeights.size() == 0ul) { - THROW_TRANSFORMATION_EXCEPTION << "precisions on weights are not specisifed"; - } - } + const bool updatePrecisions = true, + element::Type deqPrecision = element::f32) : + updatePrecisions(updatePrecisions), + deqPrecision(deqPrecision) {} Params& setUpdatePrecisions(const bool updatePrecisions) { this->updatePrecisions = updatePrecisions; return *this; } - Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations; - return *this; - } - - Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights; - return *this; - } - - Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) { - this->supportAsymmetricQuantization = supportAsymmetricQuantization; - return *this; - } - - Params& setPrecisionsOnActivations(const std::vector& precisionsOnActivations) { - this->precisionsOnActivations = precisionsOnActivations; - return *this; - } - - Params& setPrecisionsOnWeights(const std::vector& precisionsOnWeights) { - this->precisionsOnWeights = precisionsOnWeights; - return *this; - } - - Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) { - this->support3DTensorOnActivations = support3DTensorOnActivations; - return *this; - } - - Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) { - this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio; + Params& setDeqPrecision(const element::Type& deqPrecision) { + this->deqPrecision = deqPrecision; return *this; } bool updatePrecisions; - QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; - QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; - bool supportAsymmetricQuantization; - std::vector precisionsOnActivations; - std::vector precisionsOnWeights; element::Type deqPrecision; - bool support3DTensorOnActivations; - bool deconvolutionSpecificChannelsRatio; }; class PrecisionDetails { @@ -243,55 +188,49 @@ class TRANSFORMATIONS_API LayerTransformation { hasNegativeOutput(hasNegativeOutput), hasZeroPoint(hasZeroPoint) {} - const element::Type precision; - const bool hasNegativeOutput; - const bool hasZeroPoint; + element::Type precision; + bool hasNegativeOutput; + bool hasZeroPoint; }; LayerTransformation(const Params& params); virtual ~LayerTransformation() = default; - virtual void registerMatcherIn(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const = 0; - virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const = 0; + virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) = 0; - void setParamsManager(IParamsManager* paramsManager) noexcept; - void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept; + void setContext(TransformationContext* context) noexcept; void setUpdatePrecisions(const bool updatePrecisions); - void setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations); - void setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights); - - void setQuantizationIntervalAsymmetryThreshold(const float value); - void setZeroThreshold(const float value); - void setMinQuantizationLevels(const size_t levels); - - const std::vector& getPrecisionsOnActivations() const; - const std::vector& getPrecisionsOnWeights() const; virtual bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const; - - bool canSubtractBeHandled(const std::shared_ptr& op, const size_t parentIndex = 0ul) const; + static bool canBeTransformedStatic(const std::shared_ptr& layer); bool canSubtractBeHandled(const std::shared_ptr& op, const FakeQuantizeDequantization& dequantization) const; - PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails) const; + // Get precision based on FakeQuantize operation. + // Undefined value is expected. In this case the accuracy has to be defined by the calling code. + // TODO: LPT: INT8 specific here + static PrecisionDetails getPrecisionDetails( + const size_t quantizationLevels, + const std::vector& outputLowValues, + const std::vector& outputHighValues); + static PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails); + + static bool isAsymmetricQuantization(const std::shared_ptr& node); // return true if operation can be quantized and false otherwise // for example: if convolution operation weights are not quantized, then isQuantize returns false and true otherwise // note: dequantization operations on activations are absent during method execution - virtual bool isQuantized(std::shared_ptr layer) const noexcept; + virtual bool isQuantized(const std::shared_ptr& layer) const noexcept; // return true if operation can be preserved for precision // note: dequantization operations on activations are absent during method execution virtual bool isPrecisionPreserved(std::shared_ptr layer) const noexcept = 0; - DataPrecision getDataPrecision( - std::shared_ptr layer, + // weights specific + static DataPrecision getDataPrecision( + const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const bool onWeights) const; - - void fillAvailablePrecisions(std::shared_ptr layer, std::vector& availablePrecisions) const; - - std::vector> getChildrenRecursivelyExceptPrecisionPreserved(const std::shared_ptr& op) const noexcept; + const std::vector& precisions); protected: #ifdef LPT_PRINT_DEQUANTIZATION_INFO @@ -303,24 +242,10 @@ class TRANSFORMATIONS_API LayerTransformation { #endif bool updatePrecisions; - QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; - QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; - bool supportAsymmetricQuantization; - std::vector precisionsOnActivations; - std::vector precisionsOnWeights; element::Type deqPrecision; - bool support3DTensorOnActivations; - bool deconvolutionSpecificChannelsRatio; - - // absolute value, used to determine quantization interval asymmetry - float quantizationIntervalAsymmetryThreshold; - // absolute value, used to determine zero - float zeroThreshold; - size_t minQuantizationLevels; static const char originalLayerPostfix[]; - IParamsManager* paramsManager; - ILayerTransformationsManager* layerTransformationsManager; + TransformationContext* context; protected: std::shared_ptr moveDequantizationAfter( @@ -340,7 +265,7 @@ class TRANSFORMATIONS_API LayerTransformation { std::shared_ptr lastNode, std::string originalName) const; - void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) const; + void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot); //TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr layer) const; @@ -358,38 +283,6 @@ class TRANSFORMATIONS_API LayerTransformation { } }; -inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) { - switch (value) { - case LayerTransformation::QuantizedTensorAlignment::None: { - os << "None"; - break; - } - case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: { - os << "UpdateLevel"; - break; - } - default: { - os << static_cast(value); - break; - } - } - return os; -} - -inline std::ostream &operator << (std::ostream &os, const std::vector& values) { - os << "{"; - for (size_t i = 0; i < values.size(); ++i) { - const element::Type& value = values[i]; - if (i > 0) { - os << value; - } else { - os << ", " << value; - } - } - os << "}"; - return os; -} - typedef std::shared_ptr LayerTransformationPtr; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp new file mode 100644 index 00000000000000..454ebebfda338c --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +// one place to include all Low Precision Transformations from ngraph::pass::low_precision +#include +#include +#include +#include + +#include +#include +#include +#include + + +#include +#include +#include +#include "low_precision/layer_transformation.hpp" +#include "low_precision/markup_precisions.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API TypeRelaxedReplacer; +class LP_TRANSFORMATIONS_API MarkupOptimizations; +class LP_TRANSFORMATIONS_API LowPrecision; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + MarkupOptimizations( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions); + bool run_on_function(std::shared_ptr f) override; +private: + const std::vector& precisionRestrictions; + const std::vector& quantizationRestrictions; +}; + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::TypeRelaxedReplacer : public ngraph::pass::GraphRewrite { +public: + NGRAPH_RTTI_DECLARATION; + TypeRelaxedReplacer(); +}; + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + LowPrecision( + const std::vector& precisionRestrictions = {}, + const std::vector& quantizationRestrictions = {}, + const LayerTransformation::Params = LayerTransformation::Params()); + bool run_on_function(std::shared_ptr f) override; + + static bool isFunctionQuantized(const std::shared_ptr& function); + +protected: + std::vector precisionRestrictions; + std::vector quantizationRestrictions; + // remove + LayerTransformation::Params params; +}; diff --git a/inference-engine/src/low_precision_transformations/src/lpt_itt.h b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp similarity index 95% rename from inference-engine/src/low_precision_transformations/src/lpt_itt.h rename to inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp index 5b3f1b524bcb9d..081c5b8d39e79e 100644 --- a/inference-engine/src/low_precision_transformations/src/lpt_itt.h +++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp @@ -4,11 +4,12 @@ /** * @brief Defines openvino domains for tracing - * @file lpt_itt.h + * @file lpt_itt.hpp */ #pragma once + #include namespace ngraph { diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp new file mode 100644 index 00000000000000..3867192208f652 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/visibility.hpp" + +/** + * @file lpt_visibility.hpp + * @brief Defines visibility settings for Inference Engine LP Transformations library + */ + +#ifdef inference_engine_lp_transformations_EXPORTS +#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_EXPORT +#else +#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_IMPORT +#endif diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp deleted file mode 100644 index 79ce4f06ace999..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include - -#include - -#include -#include -#include - -using namespace std; - - -namespace ngraph { -namespace pass { - -class TRANSFORMATIONS_API LowPrecisionTransformations: public ngraph::pass::GraphRewrite, IParamsManager, ILayerTransformationsManager { -public: - bool run_on_function(std::shared_ptr f) override; - - // IParamsManager interface implementation - std::vector getPrecisionsOnActivations(const NodeTypeInfo& layerName) const noexcept override; - - // ILayerTransformationsManager interface implementation - bool isQuantized(std::shared_ptr layer) const noexcept override; - bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; -}; - -}// namespace pass -}// namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp new file mode 100644 index 00000000000000..e3a517bff307a2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupAvgPoolPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp new file mode 100644 index 00000000000000..82f66857337c3a --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupCanBeQuantized; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupCanBeQuantized : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp new file mode 100644 index 00000000000000..5aa9f76b1fd23f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include "common/operation_per_tensor_quantization_restriction.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass { +public: + class PerTensorQuantization { + public: + explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} + void add(const uint64_t version, const std::vector& ports) { + portsByVersion.emplace(version, ports); + } + + bool versionIsRequired; + std::unordered_map> portsByVersion; + }; + + NGRAPH_RTTI_DECLARATION; + explicit MarkupPerTensorQuantization(const std::vector& restrictions = {}); + bool run_on_function(std::shared_ptr f) override; + +private: + std::unordered_map restrictionsByOperation; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp new file mode 100644 index 00000000000000..87c7cc85a40824 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/common/operation_precision_restriction.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupPrecisions; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +// Transformation is used to add customization options runtime +class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::FunctionPass { +public: + class Restriction { + public: + explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} + void add(const uint64_t version, const std::vector>>& precisions) { + precisionsByVersion.emplace(version, precisions); + } + + bool versionIsRequired; + std::unordered_map>>> precisionsByVersion; + }; + + NGRAPH_RTTI_DECLARATION; + explicit MarkupPrecisions(const std::vector& restrictions = {}); + bool run_on_function(std::shared_ptr f) override; + +private: + static bool isPrecisionPreserved(const std::shared_ptr& node); + static bool isSupported(const std::shared_ptr& node); + std::unordered_map restrictionsByOperation; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp index 332d28b934b44e..067f82ea59b28b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp @@ -11,14 +11,14 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation { public: - MatMulTransformation(const Params& params) : LayerTransformation(params) {} - ~MatMulTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + MatMulTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; + static bool is3DTensorOnActivations(const std::shared_ptr& node); }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp index 2cf1d54eda7f44..ca2b8a08272817 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp @@ -12,12 +12,12 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation { public: - MaxPoolTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + MaxPoolTransformation(const Params& params = Params()); bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp index 30f1cff5444d37..da226fe263b757 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation { +class LP_TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation { public: - MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {} - ~MultiplyTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + MultiplyTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp index d4a575f4d9a9de..5e6bd900d8ea9e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp @@ -7,24 +7,30 @@ #include #include #include "low_precision/layer_transformation.hpp" +#include "common/operation_precision_restriction.hpp" namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation { public: - MultiplyToGroupConvolutionTransformation(const Params& params) : LayerTransformation(params), groupSize(1ul) {} + NGRAPH_RTTI_DECLARATION; + MultiplyToGroupConvolutionTransformation( + const Params& params = Params(), + const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {}); ~MultiplyToGroupConvolutionTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool canBeTransformedToGroupConvolution(const std::shared_ptr& layer) noexcept; + static bool isDynamicOrScalar(const std::shared_ptr& node); void setGroupSize(const size_t groupSize); size_t getGroupSize() const; private: + OperationPrecisionRestriction::PrecisionsByPort restrictions; size_t groupSize; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp index 37244a3aa74c0b..42ddd6f0b620a1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp @@ -10,11 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MVNTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MVNTransformation : public LayerTransformation { public: - MVNTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + MVNTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 891b341b87f522..77218320dba376 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -16,6 +16,10 @@ #include "ngraph_ops/type_relaxed.hpp" #include +#include "rt_info/shared_value_attribute.hpp" +#include "rt_info/precisions_attribute.hpp" +#include "rt_info/per_tensor_quantization_attribute.hpp" +#include "rt_info/intervals_alignment_attribute.hpp" #include "transformation_context.hpp" #include "quantization_details.hpp" #include "transformations/utils/utils.hpp" @@ -30,7 +34,7 @@ namespace low_precision { /** * @brief NetworkHelper class encapsulates manipulations with nGraph function. */ -class TRANSFORMATIONS_API NetworkHelper { +class LP_TRANSFORMATIONS_API NetworkHelper { public: // Return true if `type` can be castable to at least one of `type` static bool is_castable_to_one_of(NodeTypeInfo type, const std::unordered_set& types); @@ -76,6 +80,10 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr swapMultiplyAndAdd(std::shared_ptr addAfterMultiply, const int multiplyBranch); + static void copyInfo(const std::vector>& sources, const std::vector>& targets); + + static void copyInfo(const std::vector>& sources, const std::shared_ptr& target); + static void copyInfo(const std::shared_ptr& source, const std::shared_ptr& target); static void cleanRunTimeInfo(const std::shared_ptr& layer); @@ -116,7 +124,8 @@ class TRANSFORMATIONS_API NetworkHelper { std::shared_ptr fq, element::Type precision, float min, - float max); + float max, + const bool replace = true); static FakeQuantizeDequantization makeDequantization( const float dequantizationMul, @@ -124,7 +133,8 @@ class TRANSFORMATIONS_API NetworkHelper { const ngraph::element::Type originalPrecision, const ngraph::PartialShape dataNodeOutputShape, element::Type precision, - const element::Type deqPrecision = element::f32); + const element::Type deqPrecision = element::f32, + std::shared_ptr input = nullptr); static FakeQuantizeDequantization createDequantizationFromFakeQuantize( std::shared_ptr fq, @@ -143,7 +153,7 @@ class TRANSFORMATIONS_API NetworkHelper { static FakeQuantizeDequantization getDequantization(const std::shared_ptr& node, const size_t parentIndex = 0ul, const bool inPlace = false); - static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node); + static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node, const bool convertIsMandatory = false); static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization); @@ -200,6 +210,115 @@ class TRANSFORMATIONS_API NetworkHelper { static bool isDQByDynamicDimension(const std::shared_ptr& layer, size_t inputIdx = 0); + static bool isPrecisionPreserved(const std::shared_ptr& node); + + static void replaceAttributeInNodes( + std::shared_ptr f, + const std::string& name, + const std::shared_ptr newAttribute, + const std::shared_ptr oldAttribute, + const std::shared_ptr& initialNode) { + std::set> visited; + std::deque> nodes; + nodes.emplace_back(initialNode); + + while (!nodes.empty()) { + auto node = nodes.front(); + nodes.pop_front(); + + if (visited.count(node) || is_type(node)) { + continue; + } + + visited.insert(node); + + bool handleConnectedNodes = false; + if (NetworkHelper::isPrecisionPreserved(node) || is_type(node)) { + auto& rt = node->get_rt_info(); + + if (node == initialNode) { + rt[name] = newAttribute; + handleConnectedNodes = true; + } else { + auto it = rt.find(name); + if (it != rt.end()) { + const auto currentAttribute = it->second; + if (oldAttribute.get() == currentAttribute.get()) { + rt[name] = newAttribute; + } + handleConnectedNodes = true; + } + } + } + + if (!handleConnectedNodes) { + continue; + } + + if (!is_type(node)) { + for (size_t index = 0ul; index < node->get_input_size(); ++index) { + auto getInput = [](const std::shared_ptr& node, const size_t index) { + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + const auto input = dequantization.data.get_node()->input(0); + return input; + } + return node->input(index); + }; + + const auto& input = getInput(node, index); + const auto& input_node = input.get_source_output().get_node_shared_ptr(); + + //const auto& input_node = input.get_source_output().get_node_shared_ptr(); + if (visited.count(input_node) || is_type(input_node)) { + continue; + } + + nodes.push_front(input_node); + } + } + + for (auto& output : node->outputs()) { + for (auto& input_value : output.get_target_inputs()) { + const auto& output_node = input_value.get_node()->shared_from_this(); + if (visited.count(output_node) || is_type(output_node)) { + continue; + } + + nodes.push_front(output_node); + } + } + } + } + + template + static void reassign( + const std::shared_ptr& sharedValue, + const std::vector>& attributes) { + for (const auto attributeWeakPtr : attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + attribute->sharedValue = sharedValue; + sharedValue->attributes.push_back(attribute); + } + } + + static size_t calculateLevels( + const float dataPrecisionMin, + const float dataPrecisionMax, + const float combinedIntervalLow, + const float combinedIntervalHigh, + const float minIntervalLow, + const float minIntervalHigh, + float& dequantizationMul, + float& dequantizationSub, + float& updatedOutputLowValue, + float& updatedOutputHighValue); + private: static std::shared_ptr foldFakeQuantize( const std::shared_ptr& fq, @@ -292,6 +411,54 @@ std::shared_ptr fold_reshape(Args&&... args) { return node; } +template +std::shared_ptr> getAttribute(const std::shared_ptr& inputNode) { + auto& rt = inputNode->get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +template +std::shared_ptr> getAttribute(const Input& input) { + auto& rt = input.get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +template +std::shared_ptr> getAttributeFromOutput(const Output& output) { + auto& rt = output.get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +bool isDisabled(const std::shared_ptr& node); + +template +std::shared_ptr make_shared_attribute(Args&& ... args) { + std::shared_ptr attribute = std::make_shared(std::forward(args)...); + attribute->sharedValue->attributes.push_back(attribute); + return attribute; +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp index 9591a631e86a6b..88a113cb38a49d 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp @@ -10,11 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation { public: - NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + NormalizeL2Transformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp index ef767127315a60..e58d4b25615752 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PReluTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API PReluTransformation : public LayerTransformation { public: - PReluTransformation(const Params& params) : LayerTransformation(params) {} - ~PReluTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + PReluTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp new file mode 100644 index 00000000000000..5995b6473722dd --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API PropagatePrecisions; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::PropagatePrecisions : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp new file mode 100644 index 00000000000000..9866d63197ff1d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp @@ -0,0 +1,164 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include +#include "low_precision/network_helper.hpp" +#include "lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class LP_TRANSFORMATIONS_API PropagateSharedValue; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateSharedValue : public ngraph::pass::FunctionPass { +public: + bool run_on_function(std::shared_ptr f) override { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateSharedValue"); + + std::vector> nodes(f->get_ordered_ops()); + for (auto it = nodes.begin(); it != nodes.end(); it++) { + const std::shared_ptr node = *it; + if (is_type(node)) { + assert(node->get_output_size() == 1ul); + auto& outputRtInfo = node->output(0).get_rt_info(); + + auto attribute = make_shared_attribute(std::set{element::u8, element::i8}); + + auto attributeWrapper = std::make_shared>>(attribute); + outputRtInfo[ngraph::VariantWrapper>::type_info.name] = attributeWrapper; + continue; + } + + if (!NetworkHelper::isPrecisionPreserved(node)) { + for (auto& input : node->inputs()) { + auto parentNode = input.get_source_output().get_node_shared_ptr(); + + auto getAttributes = [](const Input& nodeInput) { + const std::string name = ngraph::VariantWrapper>::type_info.name; + + auto node = nodeInput.get_source_output().get_node_shared_ptr(); + std::vector>>> attributes; + if (is_type(node)) { + // output + auto& rt = nodeInput.get_source_output().get_rt_info(); + auto it = rt.find(name); + if (it != rt.end()) { + const auto& attribute = std::dynamic_pointer_cast>>(it->second); + attributes.push_back(attribute); + } + } + + return attributes; + }; + + auto& nodeRt = input.get_rt_info(); + + const std::string name = ngraph::VariantWrapper>::type_info.name; + const auto it = nodeRt.find(name); + if (it == nodeRt.end()) { + continue; + } + + const auto& attribute = std::dynamic_pointer_cast>>(it->second); + std::vector>>> attributes{ attribute }; + + auto parentAttributes = getAttributes(input); + if (parentAttributes.empty()) { + continue; + } + + for (auto& parentAttribute : parentAttributes) { + parentAttribute->merge(attributes); + } + + nodeRt[name] = parentAttributes[0]; + } + continue; + } + + handle(f, node); + } + return true; + } + +private: + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = node->input(index); + auto inputNode = input.get_source_output().get_node()->shared_from_this(); + + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0); + } + + if (NetworkHelper::isPrecisionPreserved(inputNode)) { + auto& inputRtInfo = inputNode->get_rt_info(); + auto inputAttributeIt = inputRtInfo.find(ngraph::VariantWrapper>::type_info.name); + if (inputAttributeIt != inputRtInfo.end()) { + const auto attribute = std::dynamic_pointer_cast>>(inputAttributeIt->second); + parentAttributes.push_back(attribute); + } + } else if (is_type(inputNode)) { + const auto& outputPortRtInfo = inputNode->outputs()[0].get_rt_info(); + auto attributeIt = outputPortRtInfo.find(ngraph::VariantWrapper>::type_info.name); + if (attributeIt != outputPortRtInfo.end()) { + const auto attribute = std::dynamic_pointer_cast>>(attributeIt->second); + parentAttributes.push_back(attribute); + } + } + } + return parentAttributes; + } + + void handle(std::shared_ptr f, const std::shared_ptr& node) { + const bool precisionPreserved = NetworkHelper::isPrecisionPreserved(node); + if (precisionPreserved) { + const auto parentRestrictions = getParentInputRestrictions(node); + if (parentRestrictions.empty()) { + return; + } + + // one operation - one output precision + // merge parent inputs to one current output + auto resultAttribute = parentRestrictions[0]; + + std::vector>>> toMerge = parentRestrictions; + toMerge.erase(toMerge.begin()); + resultAttribute->merge(toMerge); + + for (size_t index = 1ul; index < parentRestrictions.size(); index++) { + const auto oldAttribute = parentRestrictions[index]->get(); + NetworkHelper::reassign( + resultAttribute->get()->sharedValue, + parentRestrictions[index]->get()->sharedValue->attributes); + } + + auto& rt = node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = resultAttribute; + } + } +}; + diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp new file mode 100644 index 00000000000000..18a8f1e0ab839b --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp @@ -0,0 +1,118 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/network_helper.hpp" +#include "low_precision/lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class PropagateThroughPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateThroughPrecisionPreserved : public ngraph::pass::MatcherPass { +public: + PropagateThroughPrecisionPreserved() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateThroughPrecisionPreserved"); + + if (!ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node)) { + return false; + } + + const auto parentRestrictions = getParentInputRestrictions(node); + if (parentRestrictions.empty()) { + return false; + } + + auto resultAttribute = parentRestrictions[0]; + + std::vector>>> toMerge = parentRestrictions; + // TODO: LPT: handle pointer on itself in VariantWrapper::merge and remove erase, task #59498 + toMerge.erase(toMerge.begin()); + resultAttribute->merge(toMerge); + + for (size_t index = 1ul; index < parentRestrictions.size(); index++) { + const auto attributes = parentRestrictions[index]->get()->sharedValue->attributes; + for (const auto attributeWeakPtr : attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + attribute->sharedValue = resultAttribute->get()->sharedValue; + resultAttribute->get()->sharedValue->attributes.push_back(attribute); + } + } + + auto &rt = node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = resultAttribute; + } + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + std::shared_ptr>> getSourceOutputAttribute(const Input& input) { + auto input2 = input; + auto output = input2.get_source_output(); + std::shared_ptr>> attribute = getAttributeFromOutput>(output); + if (attribute == nullptr) { + attribute = getAttribute>(output.get_node_shared_ptr()); + } + return attribute; + } + + // TODO: possible duplicate: PropagateToInput::getSourceOutputAttribute + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + auto getInput = [](const std::shared_ptr& node, const size_t index) -> Input { + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + is_type(dequantization.data.get_node()) && + (dequantization.data.get_node()->get_input_size() == 1ul) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + return dequantization.data.get_node()->input(0); + } + + return node->input(index); + }; + + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = getInput(node, index); + const auto attribute = getSourceOutputAttribute(input); + if (attribute != nullptr) { + parentAttributes.push_back(attribute); + } + } + + return parentAttributes; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp new file mode 100644 index 00000000000000..1f30ab7b4a07d5 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp @@ -0,0 +1,105 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include "network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class PropagateToInput; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateToInput : public ngraph::pass::MatcherPass { +public: + PropagateToInput() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateToInput"); + + for (auto input : node->inputs()) { + auto parentAttribute = getSourceOutputAttribute(input); + if (parentAttribute == nullptr) { + continue; + } + + auto attribute = getAttribute>(input); + if (attribute != nullptr) { + if ((attribute->get()->sharedValue != nullptr) && (attribute->get()->sharedValue->precisions.empty())) { + return false; + } + + std::vector>>> attributes = { attribute }; + parentAttribute->merge(attributes); + } + + auto& rt = input.get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = parentAttribute; + } + } + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + // TODO: possible duplicate: PropagateThroughPrecisionPreserved::getParentInputRestrictions + std::shared_ptr>> getSourceOutputAttribute(const Input& input) { + auto getInput = [](const Input& input) { + const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index()); + if (!dequantization.empty() && + is_type(dequantization.data.get_node()) && + (dequantization.data.get_node()->get_input_size() == 1ul) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + return dequantization.data.get_node()->input(0); + } + + return input; + }; + + auto input2 = getInput(input); + auto output = input2.get_source_output(); + std::shared_ptr>> attribute = getAttributeFromOutput>(output); + if (attribute == nullptr) { + attribute = getAttribute>(output.get_node_shared_ptr()); + } + return attribute; + } + + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = node->input(index); + const auto attribute = getSourceOutputAttribute(input); + if (attribute != nullptr) { + parentAttributes.push_back(attribute); + } + } + return parentAttributes; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp index 639e1a00e65c74..e8bc2add659a39 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp @@ -6,14 +6,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PullReshapeThroughDequantization; +class LP_TRANSFORMATIONS_API PullReshapeThroughDequantization; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp index 3f1648841220b3..f9d957389e6e5a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp @@ -6,14 +6,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PullTransposeThroughDequantization; +class LP_TRANSFORMATIONS_API PullTransposeThroughDequantization; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp index 1e4b05fce2812b..a1c2f1ca4976b1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -18,7 +18,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API QuantizationDetails { +class LP_TRANSFORMATIONS_API QuantizationDetails { public: QuantizationDetails(); QuantizationDetails(const QuantizationDetails& quantizationDetails); @@ -27,33 +27,25 @@ class TRANSFORMATIONS_API QuantizationDetails { const std::vector& inputLowValues, const std::vector& inputHighValues, const std::vector& outputLowValues, - const std::vector& outputHighValues, - const size_t inputIntervalsCount, - const size_t outputIntervalsCount, - const size_t outputChannelsCount); + const std::vector& outputHighValues); static bool outputLayoutIsSupported(std::shared_ptr quantize); static void getInputIntervals( std::shared_ptr quantize, std::vector& inputLowValues, - std::vector& inputHighValues, - size_t& inputIntervalsCount); + std::vector& inputHighValues); static void getOutputIntervals( std::shared_ptr quantize, std::vector& outputLowValues, - std::vector& outputHighValues, - size_t& outputIntervalsCount); + std::vector& outputHighValues); static QuantizationDetails getDetails(std::shared_ptr); bool hasNegativeOutput() const; float maxOutput(const size_t channel) const; float maxInput(const size_t channel) const; - float maxOutputHigh() const; - float minOutputLow() const; - float getInputLowValue(const size_t channel) const; float getInputHighValue(const size_t channel) const; float getOutputLowValue(const size_t channel) const; @@ -66,19 +58,15 @@ class TRANSFORMATIONS_API QuantizationDetails { const std::vector inputHighValues; const std::vector outputLowValues; const std::vector outputHighValues; - const size_t inputIntervalsCount; - const size_t outputIntervalsCount; - const size_t outputChannelsCount; private: - static void validate(std::shared_ptr constantLayer); static std::vector getBlobValue(std::shared_ptr constantLayer); }; inline std::ostream &operator << (std::ostream &os, const QuantizationDetails& value) { os << "levels: " << value.levels << - ", input 1/" << value.inputIntervalsCount << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " << - ", output 1/" << value.outputIntervalsCount << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]"; + ", input 1/" << value.inputLowValues.size() << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " << + ", output 1/" << value.outputLowValues.size() << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]"; return os; } diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp index 679a8d0f61d6db..0b9782e4eb207a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp @@ -19,10 +19,10 @@ namespace low_precision { * */ -class TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation { public: - ReduceBaseTransformation(const Params& params); - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + ReduceBaseTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp index 453f48dfeca48b..b9c2b98253ef82 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation { public: - ReduceMaxTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMaxTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp index 8f62c34cc0cec0..31f542a37548b2 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation { public: - ReduceMeanTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMeanTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp index 2545af1e9febd7..e4ccdeab97e74a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation { public: - ReduceMinTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMinTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp index ae7f07efe6bc65..5053545fbff5bb 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation { public: + NGRAPH_RTTI_DECLARATION; ReduceSumTransformation(const Params& params); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp index 734a42273c50c3..1f7489a73d8337 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReluTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReluTransformation : public LayerTransformation { public: - ReluTransformation(const Params& params) : LayerTransformation(params) {} - ~ReluTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ReluTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp index 290e028dc5f3e9..cb1b3a28456f03 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation { public: - ReshapeTransformation(const Params& params) : LayerTransformation(params) {} - ~ReshapeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ReshapeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp new file mode 100644 index 00000000000000..6789bc73ae564f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/lpt_visibility.hpp" + +class LP_TRANSFORMATIONS_API AttributeParameters { +public: + AttributeParameters(const ngraph::element::Type deqPrecision = ngraph::element::f32) : deqPrecision(deqPrecision) {} + ngraph::element::Type deqPrecision; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp new file mode 100644 index 00000000000000..b8aabf3718db4b --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +namespace ngraph { +class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute { +}; + +using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; } + + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp new file mode 100644 index 00000000000000..3c723a444055c4 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp @@ -0,0 +1,88 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include + +#include "low_precision/rt_info/shared_value_attribute.hpp" +#include "low_precision/rt_info/attribute_parameters.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +class IntervalsAlignmentAttribute; + +class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue : public SharedValue { +public: + class Interval { + public: + Interval() = default; + Interval(const float low, const float high) : low(low), high(high) {} + float low; + float high; + }; + + IntervalsAlignmentSharedValue() = default; + IntervalsAlignmentSharedValue( + const Interval& combinedInterval, + const Interval& minInterval, + const size_t minLevels) : + combinedInterval(combinedInterval), + minInterval(minInterval), + minLevels(minLevels) {} + + Interval combinedInterval; + Interval minInterval; + size_t minLevels; + // preferable precisions which are preferred by affected quantization operations to avoid zero points + std::set preferablePrecisions; + +#ifdef LPT_DEBUG + std::string minLevelsOperation; +#endif +}; + +class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedValueAttribute { +public: + IntervalsAlignmentAttribute() = default; + IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels); + IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + const size_t levels, + const IntervalsAlignmentSharedValue::Interval minInterval, + const size_t minLevels); + + // specify subgraph original levels + size_t levels; +}; + +using IntervalsAlignmentAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : + public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr get() const { return this->m_value; } + + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp new file mode 100644 index 00000000000000..1001df8bffeaf7 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include "low_precision/rt_info/shared_value_attribute.hpp" +#include "low_precision/layer_transformation.hpp" +#include "attribute_parameters.hpp" + +namespace ngraph { +class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute { +}; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 }; + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp new file mode 100644 index 00000000000000..bf109407d008e9 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/shared_value_attribute.hpp" + +namespace ngraph { + +class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute; + +class LP_TRANSFORMATIONS_API PrecisionPreservedSharedValue : public SharedValue { +public: + PrecisionPreservedSharedValue() = default; + PrecisionPreservedSharedValue(const bool value) : value(value) {} + bool value; +}; + +class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedValueAttribute { +public: + PrecisionPreservedAttribute() = default; + PrecisionPreservedAttribute(const bool value); +}; + +using PrecisionPreservedAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + PrecisionPreservedAttributePtr get() { return this->m_value; } + + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp new file mode 100644 index 00000000000000..5fc08c17926a98 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "low_precision/layer_transformation.hpp" +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/attribute_parameters.hpp" +#include "low_precision/rt_info/shared_value_attribute.hpp" + +namespace ngraph { + +class PrecisionsAttribute; + +class LP_TRANSFORMATIONS_API PrecisionsSharedValue : public SharedValue { +public: + std::vector precisions; +}; + +using PrecisionsAttributePtr = std::shared_ptr; + +class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedValueAttribute { +public: + static const std::vector defaultPrecisions; + PrecisionsAttribute(const std::vector& precisions = defaultPrecisions); +}; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl>; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr init(const std::shared_ptr& node) override; + + std::shared_ptr get() { return this->m_value; } + + // create attribute instance for node + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + // merge attribute instances which can be got from different sources: node, input port or output port + void merge(std::vector>>>& attributes); + // vizualize shared attributes details in VizualizeTree pass + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp new file mode 100644 index 00000000000000..198301a9c4aef2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include "shared_value_attribute.hpp" +#include "attribute_parameters.hpp" + +namespace ngraph { +class QuantizationAlignmentAttribute; + +class LP_TRANSFORMATIONS_API QuantizationAlignmentSharedValue : public SharedValue { +public: + QuantizationAlignmentSharedValue(const bool value = false) : value(value) {} + bool value; +}; + +class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedValueAttribute{ +public: + QuantizationAlignmentAttribute(const bool value = false); +}; + +using QuantizationAlignmentAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : + public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr init(const std::shared_ptr& node) override; + + std::shared_ptr get() { return this->m_value; } + + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp new file mode 100644 index 00000000000000..706ff46d590fa6 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +template +class LP_TRANSFORMATIONS_API SharedValue; + +template +class LP_TRANSFORMATIONS_API SharedValueAttribute { +public: + SharedValueAttribute() : sharedValue(std::make_shared()) {} + virtual ~SharedValueAttribute() = default; + std::shared_ptr sharedValue; + std::string get_string() { + std::stringstream ss; + + const size_t rawPointer = (size_t)this; + ss << rawPointer << ": "; + + const size_t sharedValueRawPointer = (size_t)sharedValue.get(); + ss << "sharedValue: " << sharedValueRawPointer; + + bool firstAttribute = true; + ss << ", attributes: {"; + for (auto& attributeWeakPtr : sharedValue->attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + + if (!firstAttribute) { + ss << ", "; + } + ss << (size_t)attribute.get(); + firstAttribute = false; + } + ss << "}, "; + return ss.str(); + } +}; + +template +class LP_TRANSFORMATIONS_API SharedValue { +public: + virtual ~SharedValue() = default; + std::vector> attributes; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp index 42124d4b7b101b..ab28d754598e67 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation { public: - ShuffleChannelsTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + ShuffleChannelsTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp index 5a9fbc48ce7916..d4f2c72b8beb7b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp @@ -13,11 +13,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SplitTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SplitTransformation : public LayerTransformation { public: - SplitTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + SplitTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; void updateOutputs( diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp index df4d3576a2b68d..fab050564c8bc0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation { public: - SqueezeTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SqueezeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp index 2228020d45988c..5a0520f54ae9b1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation { public: - StridedSliceTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + StridedSliceTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp index e0beb34946ae88..56c66d9945040b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation { public: - SubtractTransformation(const Params& params) : LayerTransformation(params) {} - ~SubtractTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SubtractTransformation(const Params& params); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp index 62bcd527663a6e..cee4f4f5d627e1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation { public: - SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) {} - ~SubtractMultiplyToMultiplyAddTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SubtractMultiplyToMultiplyAddTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp index 0419cac1256cc8..1aad5e55bd648e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp @@ -13,8 +13,9 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransformationContext { +class LP_TRANSFORMATIONS_API TransformationContext { public: + TransformationContext(); explicit TransformationContext(std::shared_ptr function); std::shared_ptr function; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp deleted file mode 100644 index 9e096c85ce458a..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include "layer_transformation.hpp" -#include "iparams_manager.hpp" -#include "ilayer_transformations_manager.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -struct StandaloneCleanup { - std::string typeName; - std::string typeId; - LayerTransformationPtr transformation; -}; - -class TRANSFORMATIONS_API LowPrecisionTransformations { -public: - LowPrecisionTransformations() {} - LowPrecisionTransformations( - const std::map& branchSpecificTransformations, - const std::map& decompositionTransformations, - const std::map& transformations, - const std::map>>& cleanupTransformations, - const std::vector& standaloneCleanupTransformations); - - void setUpdatePrecisions(const bool updatePrecisions); - void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations); - void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights); - - /** - * Remove branch specific transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeBranchSpecific() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - const auto& tranformationPtr = *it->second; - if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) { - branchSpecificTransformations.erase(it); - break; - } - } - return *this; - } - - /** - * Remove transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& remove() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - const auto& tranformationPtr = *it->second; - if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) { - transformations.erase(it); - break; - } - } - return *this; - } - - /** - * Remove cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeCleanup() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - const auto it = cleanupTransformations.find(operationType); - if (it != cleanupTransformations.end()) { - const auto it1 = std::find_if(it->second.begin(), it->second.end(), - [&](const std::pair& transformation) { - return transformation.first == transformationType; - }); - if (it1 != it->second.end()) { - it->second.erase(it1); - if (it->second.empty()) { - cleanupTransformations.erase(it); - } - } - } - return *this; - } - - /** - * Remove standalone cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeStandaloneCleanup() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = standaloneCleanupTransformations.begin(); it != standaloneCleanupTransformations.end(); ++it) { - const auto& standaloneCleanup = *it; - if ((operationType == standaloneCleanup.typeName) && (transformationType == standaloneCleanup.typeId)) { - standaloneCleanupTransformations.erase(it); - break; - } - } - return *this; - } - - template - LowPrecisionTransformations& removeAll() { - removeBranchSpecific(); - remove(); - removeCleanup(); - removeStandaloneCleanup(); - - return *this; - } - - /** - * Add branch specific transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = branchSpecificTransformations.find(typeName); - if (it != branchSpecificTransformations.end()) { - branchSpecificTransformations.erase(it); - } - - branchSpecificTransformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add decomposition transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addDecomposition(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = decompositionTransformations.find(typeName); - if (it != decompositionTransformations.end()) { - decompositionTransformations.erase(it); - } - - decompositionTransformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& add(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = transformations.find(typeName); - if (it != transformations.end()) { - transformations.erase(it); - } - - transformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const std::string typeId = typeid(Transformation).name(); - const auto it = cleanupTransformations.find(typeName); - if (it == cleanupTransformations.end()) { - cleanupTransformations.emplace(typeName, - std::vector>{ std::make_pair(typeId, std::make_shared(params)) }); - } else { - const auto it1 = std::find_if(it->second.begin(), it->second.end(), - [&](const std::pair& transformation) { - return transformation.first == typeName; - }); - if (it1 != it->second.end()) { - it->second.erase(it1); - } - it->second.emplace_back(std::make_pair(typeId, std::make_shared(params))); - } - return *this; - } - - /** - * Add cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addStandaloneCleanup(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const std::string typeId = typeid(Transformation).name(); - const auto it = std::find_if(standaloneCleanupTransformations.begin(), standaloneCleanupTransformations.end(), - [&](const StandaloneCleanup& transformation) { - return transformation.typeName == typeName && transformation.typeId == typeId; - }); - if (it == standaloneCleanupTransformations.end()) { - standaloneCleanupTransformations.emplace_back(StandaloneCleanup{ typeName, typeId, std::make_shared(params) }); - } else { - *it = { typeName, typeId, std::make_shared(params) }; - } - - return *this; - } - - template - static std::string getType() { - return Operation::get_type_info_static().name; - } - - static std::string getType(const Node& operation) { - return operation.get_type_name(); - } - - std::vector find(const std::string& transformationName) const; - - template - std::vector find() const { - const std::string transformationKey = getType(); - return find(transformationKey); - } - - void setParamsManager(IParamsManager* paramsManager) noexcept; - void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept; - - // Key is not a layer type, but just a name of transformation - // Layer type (or a pattern) is defined by transformation itself as an ngraph matcher - std::map branchSpecificTransformations; - std::map decompositionTransformations; - std::map transformations; - std::map>> cleanupTransformations; - std::vector standaloneCleanupTransformations; - -private: - static void setParamsManager(IParamsManager* paramsManager, std::map& transformations) noexcept; - static void setParamsManager( - IParamsManager* paramsManager, - std::map>>& transformations) noexcept; - static void setParamsManager(IParamsManager* paramsManager, std::vector& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map>>& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::vector& transformations) noexcept; -}; - -/** - * @brief low precision transformation component. - */ -class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILayerTransformationsManager { -public: - static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params()); - - static bool isFunctionQuantized(const std::shared_ptr& function); - - LowPrecisionTransformer(); - LowPrecisionTransformer(const LowPrecisionTransformations& transformations); - void transform(std::shared_ptr network); - - // IParamsManager interface implementation - std::vector getPrecisionsOnActivations(const Node& op) const noexcept override; - - // ILayerTransformationsManager interface implementation - bool isQuantized(const std::shared_ptr& layer) const noexcept override; - bool isPrecisionPreserved(const std::shared_ptr& layer) const noexcept override; - -private: - LowPrecisionTransformations transformations; - - void registerAllMatchers( - std::map transformations, - GraphRewrite& pass, - TransformationContext& context); - - void registerAllMatchers( - std::map>> transformations, - GraphRewrite& pass, - TransformationContext& context); -}; - -class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite { -public: - TypeRelaxedReplacer(); -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp index d915515b598197..05b0dbebc0191f 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation { public: TransparentBaseTransformation(const Params& params) : LayerTransformation(params) {} ~TransparentBaseTransformation() override {}; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp index 3b41f3d48b25a7..d22fcc8ed8cf36 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation { public: - TransposeTransformation(const Params& params) : LayerTransformation(params) {} - ~TransposeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + TransposeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp index ea166c979120ab..580c09ad80bcce 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation { public: - UnsqueezeTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + UnsqueezeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp new file mode 100644 index 00000000000000..119ae13c412126 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include + +#include "low_precision/network_helper.hpp" +#include "low_precision/lpt_itt.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class UpdateSharedPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ngraph::pass::MatcherPass { +public: + UpdateSharedPrecisionPreserved() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + + const bool needToCheckExpectedAttributeType = !std::is_same::value; + if (!needToCheckExpectedAttributeType) { + // expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations + if (is_type(node) || + is_type(node) || + transformation_callback(node)) { + return false; + } + } + + if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || is_type(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "UpdateSharedPrecisionPreserved"); + + // TODO: check if node can be quantized, if not, then doesn't update + for (auto input : node->inputs()) { + auto precisionsAttributeWrapper = getAttribute(input); + if (precisionsAttributeWrapper != nullptr) { + const auto precisionsAttribute = precisionsAttributeWrapper->get(); + assert(precisionsAttribute != nullptr); + if (precisionsAttribute->sharedValue->precisions.empty()) { + return false; + } + } + } + + for (auto input : node->inputs()) { + if (needToCheckExpectedAttributeType) { + if (getAttribute(input) == nullptr) { + return false; + } + } + auto parentAttribute = getSourceAttribute(input); + if (parentAttribute == nullptr) { + continue; + } + + parentAttribute->get()->sharedValue->value = true; + } + } + + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + Input getDequantizationInput(const Input& input) { + const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index()); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + assert(dequantization.data.get_target_inputs().size() == 1ul); + return *dequantization.data.get_target_inputs().begin(); + } + return input; + } + + std::shared_ptr> getSourceAttribute(const Input& input) { + const auto dequantizationInput = getDequantizationInput(input); + const auto output = dequantizationInput.get_source_output(); + auto attribute = ngraph::pass::low_precision::getAttribute(output.get_node()->shared_from_this()); + if (attribute == nullptr) { + attribute = ngraph::pass::low_precision::getAttribute(output.get_node_shared_ptr()); + } + return attribute; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp index e7cab0c527c10e..014b3775fe75b8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp @@ -13,10 +13,10 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation { +class LP_TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation { public: - VariadicSplitTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + VariadicSplitTransformation(const Params& params = Params()); }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp index aeb0a6d9abd576..d2b5823fd3d16d 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp @@ -13,21 +13,30 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{ +class LP_TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{ public: WeightableLayerTransformation(const Params& params); bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr layer) const; - bool isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; + static bool checkPrecisionOnActivation( + const std::shared_ptr& node, + const std::vector& supportedPrecisionsOnActivations) { + return true; + } + + static bool isQuantizedStatic(const std::shared_ptr& layer, const bool reshapeIsRequired) noexcept; + protected: - void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; + bool decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; static bool isGroup(const std::shared_ptr& node); static bool isDepthwise(const std::shared_ptr& node); - std::shared_ptr getFakeQuantizeOnWeights(const std::shared_ptr& node) const; - DataPrecision getDataPrecisionOnWeights(const std::shared_ptr& node) const; +public: + static std::shared_ptr getFakeQuantizeOnWeights(const std::shared_ptr& node); + static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr& node); + static bool isAsymmetricOnWeights(const std::shared_ptr& node); }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp index 915e87d2f60803..4ecd8464370c94 100644 --- a/inference-engine/src/low_precision_transformations/src/add.cpp +++ b/inference-engine/src/low_precision_transformations/src/add.cpp @@ -10,6 +10,7 @@ #include #include +#include #include "ngraph_ops/type_relaxed.hpp" #include "low_precision/common/ie_lpt_exception.hpp" @@ -20,6 +21,8 @@ namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(AddTransformation, "AddTransformation", 0); + std::shared_ptr replaceToSubtract(const std::shared_ptr& op) { // TODO: separate this part to standalone transformation: AddToSubtractTransformation // motivation: @@ -88,11 +91,22 @@ std::shared_ptr fuseWithSubtract(const std::shared_ptr& return newSubtract; } -void AddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "AddTransformation"); + this->register_matcher(m, callback); } -bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr op = as_type_ptr(m.get_match_root()); if ((op == nullptr) || (!canBeTransformed(context, op))) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp b/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp new file mode 100644 index 00000000000000..728161d0207aa1 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/align_quantization_intervals.hpp" +#include +#include +#include "low_precision/create_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationIntervals, "AlignQuantizationIntervals", 0); + +bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr intervalsAlignment = manager.register_pass(); + intervalsAlignment->add_matcher>(); + intervalsAlignment->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp b/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp new file mode 100644 index 00000000000000..72d4ed1184c694 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/align_quantization_parameters.hpp" +#include +#include "low_precision/create_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" +#include "low_precision/update_shared_precision_preserved.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationParameters, "AlignQuantizationParameters", 0); + +bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr propagation = manager.register_pass(); + propagation->add_matcher>(); + propagation->add_matcher>(); + propagation->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp index 3af973904e4be1..1fde22ec550f5e 100644 --- a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp @@ -7,39 +7,39 @@ #include #include #include +#include #include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" namespace ngraph { namespace pass { namespace low_precision { -AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AvgPoolTransformation, "AvgPoolTransformation", 0); -void AvgPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "AvgPoolTransformation"); + this->register_matcher(m, callback); } -bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } const std::shared_ptr pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); - - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(pooling); - - bool updatePrecision; - if ((children.size() == 1ul) && (!this->layerTransformationsManager->isQuantized(children[0]))) { - updatePrecision = false; - } else { - updatePrecision = NetworkHelper::notAllChildrensAreFQ(children); - } - + const bool updatePrecision = isPrecisionPreserved(pooling); moveDequantizationAfter(context, pooling, NetworkHelper::getDequantization(pooling), updatePrecision); return true; } @@ -55,8 +55,7 @@ bool AvgPoolTransformation::canBeTransformed(const TransformationContext& contex } bool AvgPoolTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(layer); - return NetworkHelper::notAllChildrensAreFQ(children); + return NetworkHelper::isPrecisionPreserved(layer); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp b/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp new file mode 100644 index 00000000000000..2514559179edb1 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/base_matcher_pass.hpp" +#include +#include "low_precision/rt_info/attribute_parameters.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +ngraph::pass::low_precision::BaseMatcherPass::BaseMatcherPass(const AttributeParameters& params) : params(params) { +} diff --git a/inference-engine/src/low_precision_transformations/src/clamp.cpp b/inference-engine/src/low_precision_transformations/src/clamp.cpp index 56cee1d88a497b..45c4cd5986c1a1 100644 --- a/inference-engine/src/low_precision_transformations/src/clamp.cpp +++ b/inference-engine/src/low_precision_transformations/src/clamp.cpp @@ -6,21 +6,32 @@ #include #include #include + +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ClampTransformation, "ClampTransformation", 0); + +ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ClampTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label() })); + auto m = std::make_shared(matcher, "ClampTransformation"); + this->register_matcher(m, callback); } -bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { auto subWithTheSameValues = [](std::shared_ptr sub) { if (sub == nullptr) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp b/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp new file mode 100644 index 00000000000000..0ec085d7245129 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/common/operation_precision_restriction.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" + +using namespace ngraph; + diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 622550794b29ab..0863dcb3f09763 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -11,11 +11,11 @@ #include #include +#include #include #include "low_precision/common/fake_quantize_dequantization.hpp" #include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/common/subgraph.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/network_helper.hpp" @@ -23,218 +23,155 @@ namespace ngraph { namespace pass { namespace low_precision { -void ConcatTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConcatTransformation, "ConcatTransformation", 0); -bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); - if (!canBeTransformed(context, concat)) { - return false; - } +ConcatTransformation::ConcatTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); - ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager); - std::unordered_set handledLayers; - if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) { - return false; - } + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } - if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) { - return false; - } + return transform(*context, m); + }; - // Concat operations precision is defined: - // 1. consumers after Concat - // 2. FakeQuantize precisions without zero point - ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0]; - std::shared_ptr fq = ngraph::as_type_ptr(quantizationLayer.shared_from_this()); - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - if (dataPrecision.precision == ngraph::element::undefined) { + auto m = std::make_shared(matcher, "ConcatTransformation"); + this->register_matcher(m, callback); +} + +bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { + std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); + if (!canBeTransformed(context, concat)) { return false; } - std::vector concatChildrenPrecisions = precisionsOnActivations; - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); - if (fq == nullptr) { + std::vector layerDequantizations; + layerDequantizations.reserve(concat->get_input_size()); + for (size_t parentIndex = 0ul; parentIndex < concat->get_input_size(); parentIndex++) { + FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, parentIndex); + if (dequantization.empty()) { return false; } + layerDequantizations.push_back(dequantization); + } - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; + bool allDequantizationShiftAreZero = true; + bool allDequantizationMultiplyAreZero = true; + for (const auto& dequantization : layerDequantizations) { + if (dequantization.subtract != nullptr) { + allDequantizationShiftAreZero = false; } - const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(fq); - - // per tensor scale is supported only - if (quantizationDetails.inputHighValues.size() != 1ul) { - return false; + if (dequantization.multiply != nullptr) { + allDequantizationMultiplyAreZero = false; } - // define concatenation operation consumers precisions - std::vector fqChildrenPrecisions = precisionsOnActivations; - fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions); - concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions); - if (concatChildrenPrecisions.empty()) { - return false; + if (!allDequantizationShiftAreZero && !allDequantizationMultiplyAreZero) { + break; } + } - // define FakeQuantize precisions without zero point - const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); - if (dataPrecision2.precision == ngraph::element::undefined) { - return false; - } + auto broadcastElementWiseConst = []( + // FakeQuantize constant shape must be broadcastable to the shape on data. + std::shared_ptr operation, + const ngraph::Shape targetShape) -> std::shared_ptr { + auto targetShapeConst = std::make_shared( + element::i64, ngraph::Shape{ targetShape.size() }, + targetShape); - if (dataPrecision.precision != dataPrecision2.precision) { - dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2; - } - } + auto broadcast = ngraph::pass::low_precision::fold( + operation, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); - if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) { - dataPrecision = DataPrecision(concatChildrenPrecisions[0]); - } + return broadcast; + }; - std::vector quantizationLayersDetails; - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - std::shared_ptr fakeQuantize = as_type_ptr(subgraph.quantizationLayers[i]); - auto newFakeQuantize = NetworkHelper::fuseConvert(fakeQuantize); - if (newFakeQuantize == nullptr) { - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - continue; + bool someDqInLowPrecision = std::any_of( + layerDequantizations.begin(), + layerDequantizations.end(), + [](const FakeQuantizeDequantization& value) { return value.isLowPrecision(); }); + + bool someDqInFpPrecision = std::any_of( + layerDequantizations.begin(), + layerDequantizations.end(), + [](const FakeQuantizeDequantization& value) { return !value.isLowPrecision(); }); + + bool DqWithDifferentPrecision = someDqInLowPrecision && someDqInFpPrecision; + + OutputVector dataNodes; + NodeVector convertNodes; + NodeVector subtractNodes; + NodeVector multiplyNodes; + for (size_t i = 0; i < layerDequantizations.size(); ++i) { + const auto& dequantization = layerDequantizations[i]; + + if (DqWithDifferentPrecision && dequantization.isLowPrecision()) { + dataNodes.push_back(dequantization.convert); + } else { + dataNodes.push_back(dequantization.data); } - fakeQuantize = newFakeQuantize; - newFakeQuantize = NetworkHelper::composeFakeQuantize(fakeQuantize); - if (newFakeQuantize == nullptr) { - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - continue; + if (dequantization.convert != nullptr) { + convertNodes.push_back(dequantization.convert); } - fakeQuantize = newFakeQuantize; - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - } - - FakeQuantizeDequantization dequantization; + Shape targetShape(concat->get_input_partial_shape(i).rank().get_length(), 1ul); + targetShape[1] = concat->get_input_partial_shape(i)[1].get_length(); - if ((quantizationLayersDetails[0].inputHighValues.size() == 1)) { - float outputLowValue = quantizationLayersDetails[0].outputLowValues[0]; - float outputHighValue = quantizationLayersDetails[0].outputHighValues[0]; - - for (size_t index = 0lu; index < subgraph.quantizationLayers.size(); index++) { - const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index]; - if (outputLowValue > quantizationDetails.outputLowValues[0]) { - outputLowValue = quantizationDetails.outputLowValues[0]; - } - if (outputHighValue < quantizationDetails.outputHighValues[0]) { - outputHighValue = quantizationDetails.outputHighValues[0]; - } + if (!allDequantizationShiftAreZero) { + subtractNodes.push_back(dequantization.subtract == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 0.f })) : + broadcastElementWiseConst(dequantization.subtractConstant, targetShape)); } - if ((outputLowValue == 0.f) && (outputHighValue == 0.f)) { - return false; + if (!allDequantizationMultiplyAreZero) { + multiplyNodes.push_back(dequantization.multiply == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 1.0f })) : + broadcastElementWiseConst(dequantization.multiplyConstant, targetShape)); } + } - const float maxOutputInterval = outputHighValue - outputLowValue; - if (quantizedTensorAlignmentOnActivations == QuantizedTensorAlignment::UpdateLevel) { - const size_t minLevels = getMinQuantizationLevels( - dataPrecision, - maxOutputInterval, - quantizationLayersDetails, - outputLowValue, - outputHighValue); - if (minLevels < this->minQuantizationLevels) { - return false; - } - } + const auto newConcat = concat->clone_with_new_inputs(dataNodes); - // FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization -> - const float quantizationMul = (dataPrecision.max - dataPrecision.min) / maxOutputInterval; - const float dequantizationMul = maxOutputInterval / (dataPrecision.max - dataPrecision.min); - - // FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub - const float quantizationSub = outputLowValue - dataPrecision.min * dequantizationMul; - const float dequantizationSub = std::round(-quantizationSub * quantizationMul); - - // 1. get data for dequantization. Dequantization data will be used several times later. - dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization( - dequantizationMul, - dequantizationSub, - subgraph.quantizationLayers[0]->get_output_element_type(0), - subgraph.quantizationLayers[0]->get_output_partial_shape(0), - updatePrecisions ? dataPrecision.precision : subgraph.quantizationLayers[0]->get_output_element_type(0), - deqPrecision); - - for (size_t index = 0; index < subgraph.quantizationLayers.size(); index++) { - std::shared_ptr fakeQuantizeLayer = as_type_ptr( - subgraph.quantizationLayers[index]->shared_from_this()); - - const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index]; - - switch (quantizedTensorAlignmentOnActivations) { - case QuantizedTensorAlignment::None: { - THROW_TRANSFORMATION_EXCEPTION << "not implemented: " << quantizedTensorAlignmentOnActivations; - } - case QuantizedTensorAlignment::UpdateLevel: { - const float updatedOutputLowValue = (quantizationDetails.outputLowValues[0] - quantizationSub) * quantizationMul; - const float updatedOutputHighValue = (quantizationDetails.outputHighValues[0] - quantizationSub) * quantizationMul; - - // 2. update FakeQuantize - one time action - std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( - fakeQuantizeLayer, - updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0), - roundf(updatedOutputLowValue), - roundf(updatedOutputHighValue)); - - const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); - newFakeQuantizeLayer->set_levels(levels); - - subgraph.quantizationLayers[index] = newFakeQuantizeLayer; - subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer; - break; - } - default: { - THROW_TRANSFORMATION_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnActivations; - } - } - } - } else { - return false; + std::shared_ptr lastDequantization = newConcat; + if (!convertNodes.empty()) { + const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat }); + + NetworkHelper::copyInfo({ concat, convert }, convert); + lastDequantization = convert; } - auto dequantizationValuesCallback = [&]( - std::shared_ptr layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate) { - dequantizationsToConcatenate.push_back(dequantization); - }; + // concatenation axis is 1 + if (!subtractNodes.empty()) { + const auto subtract = std::make_shared( + lastDequantization, + NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ? + subtractNodes[0] : + ngraph::pass::low_precision::fold(subtractNodes, 1))); - addDequantizationLayers(context, subgraph, dequantizationValuesCallback); - - if (updatePrecisions) { - for (const auto it : subgraph.layers) { - const std::shared_ptr& node = it.second; - if (std::dynamic_pointer_cast(node) != nullptr) { - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision); - } else { - // set precision to explicitly to have updated precision during transformation - for (size_t i = 0; i < node->get_output_size(); ++i) { - node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i)); - } - } - } + NetworkHelper::copyInfo({ concat, subtract }, subtract); + lastDequantization = subtract; } - for (const std::shared_ptr& quantizationLayer : subgraph.quantizationLayers) { - context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name()); + if (!multiplyNodes.empty()) { + const auto multiply = std::make_shared>( + DequantizationMultiply( + lastDequantization, + NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ? + multiplyNodes[0] : + ngraph::pass::low_precision::fold(multiplyNodes, 1))), + layerDequantizations[0].multiply->get_output_element_type(0)); + + NetworkHelper::copyInfo({ concat, multiply }, multiply); + lastDequantization = multiply; } + + replace_node(concat, lastDequantization); + NetworkHelper::copyInfo(concat, newConcat); + updateOutput(context, lastDequantization, newConcat); return true; } @@ -251,6 +188,8 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context const auto axis = concat->get_axis(); const auto outPShape = concat->get_output_partial_shape(0); const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank()); + + // TODO: LPT: to support current flow: #58269 if (normalizedAxis != 1ul) { return false; } @@ -259,6 +198,27 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context return false; } + const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul; + + element::Type precision; + for (size_t i = 0ul; i < concat->get_input_size(); i++) { + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i); + if (dequantization.empty() || (updatePrecisions && !dequantization.isLowPrecision())) { + return false; + } + + if (precision == element::undefined) { + precision = dequantization.data.get_element_type(); + } else if (precision != dequantization.data.get_element_type()) { + return false; + } + + if (perTensorQuantizationIsRequired && + (((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) || + ((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) { + return false; + } + } return true; } @@ -338,115 +298,6 @@ std::shared_ptr ConcatTransformation::concatenateDeqNodes(NodeVector& node return nodes.size() == 1ul ? nodes[0] : fold(nodes, 1); } -void ConcatTransformation::addDequantizationLayers( - TransformationContext& context, - ngraph::pass::low_precision::Subgraph& subgraph, - std::function layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const { - std::unordered_map outputs; - for (size_t i = 0; i < context.function->get_output_size(); ++i) { - ngraph::Node* node = context.function->get_output_op(i).get(); - if (node->get_input_size() != 1ul) { - THROW_IE_LPT_EXCEPTION(*node) << "unexpected inputs count for result node"; - } - - outputs.emplace(node->get_input_node_shared_ptr(0)->get_friendly_name(), node); - } - - std::unordered_map> notHandledSubgraphLayers = subgraph.layers; - while (notHandledSubgraphLayers.size() != 0ul) { - const auto layerIt = notHandledSubgraphLayers.begin(); - std::shared_ptr layer = layerIt->second; - notHandledSubgraphLayers.erase(layerIt); - - std::vector layerDequantizations; - - for (size_t i = 0; i < layer->get_output_size(); ++i) { - const auto childInputs = layer->get_output_target_inputs(i); - for (const auto childInput : childInputs) { - ngraph::Node& child = *childInput.get_node(); - - if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) { - std::shared_ptr source = layer; - const std::shared_ptr destination = child.shared_from_this(); - - if (layerDequantizations.size() == 0ul) { - // fill layerDequantizations collection - getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations); - } - - { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; - - // forming nodes for concatenation - fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes); - - // TODO: the second place (first is FQ decomposition) where dequantization operations are inserted - if (!convertNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr convert = - convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) }); - - insert_new_node_between(source, destination, convert); - ngraph::copy_runtime_info({ layer, convert }, convert); - source = convert; - } - - // concatenation axis is 1 - if (!subtractNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr subtract = std::make_shared( - destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes))); - - insert_new_node_between(source, destination, subtract); - ngraph::copy_runtime_info({ layer, subtract }, subtract); - source = subtract; - } - - if (!multiplyNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr multiply = std::make_shared>( - DequantizationMultiply( - destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))), - layerDequantizations[0].multiply->get_output_element_type(0)); - - insert_new_node_between(source, destination, multiply); - ngraph::copy_runtime_info({ layer, multiply }, multiply); - source = multiply; - } - } - - // first input is used - const ngraph::element::Type precision = layerDequantizations[0].data.get_element_type(); - layer->set_output_type(0, precision, layer->get_output_partial_shape(0)); - - const auto it = outputs.find(layer->get_friendly_name()); - if (it != outputs.end() && is_type(child.shared_from_this())) { - const std::string originalName = layer->get_friendly_name(); - const std::string newName = layer->get_friendly_name() + LayerTransformation::originalLayerPostfix; - layer->set_friendly_name(newName); - - // Split & VariadicSplit have other naming rules - if (is_type(layer) || is_type(layer)) { - source->set_friendly_name(originalName + "." + std::to_string(i)); - } else { - source->set_friendly_name(originalName); - } - subgraph.layers[layer->get_friendly_name()] = layer; - } - } - } - } - } -} - bool ConcatTransformation::isHandled(const TransformationContext& context, const std::vector>& quantizationOperations) { for (const std::shared_ptr& quantizationLayer : quantizationOperations) { if (context.quantizedFakeQuantizeNames.find(quantizationLayer->get_friendly_name()) != context.quantizedFakeQuantizeNames.end()) { @@ -457,32 +308,6 @@ bool ConcatTransformation::isHandled(const TransformationContext& context, const return false; } -size_t ConcatTransformation::getMinQuantizationLevels( - const DataPrecision& dataPrecision, - const float maxOutputInterval, - const std::vector& quantizationLayersDetails, - const float outputLowValue, - const float outputHighValue) const { - size_t minLevels = std::numeric_limits::max(); - for (const QuantizationDetails quantizationDetails : quantizationLayersDetails) { - // if there is negative part then calculation is based on `outputLowValue` if not then on `outputHighValue` only - const float updatedOutputLowValue = outputLowValue != 0.f ? - (quantizationDetails.outputLowValues[0] / outputLowValue) * dataPrecision.min : - (quantizationDetails.outputLowValues[0] / outputHighValue) * dataPrecision.max; - - // if there is positive part then calculation is based on `outputHighValue` if not then on `outputLowValue` only - const float updatedOutputHighValue = outputHighValue != 0.f ? - (quantizationDetails.outputHighValues[0] / outputHighValue) * dataPrecision.max : - (quantizationDetails.outputHighValues[0] / outputLowValue) * dataPrecision.min; - - const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); - if (minLevels > levels) { - minLevels = levels; - } - } - return minLevels; -} - } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp deleted file mode 100644 index cd1f01a54f68cc..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "low_precision/concat_multi_channels.hpp" - -#include -#include -#include -#include -#include - -#include -#include - -#include "low_precision/common/fake_quantize_dequantization.hpp" -#include "low_precision/common/dequantization_op.hpp" -#include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/common/subgraph.hpp" -#include "low_precision/network_helper.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector>& concatLayers) const noexcept { - for (const std::shared_ptr& concat : concatLayers) { - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(concat); - for (const std::shared_ptr& child : children) { - if ((is_type(child.get()) || - is_type(child.get())) && - this->layerTransformationsManager->isQuantized(child)) { - return false; - } - } - } - return true; -} - -void ConcatMultiChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); -} - -bool ConcatMultiChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); - if (!canBeTransformed(context, concat)) { - return false; - } - - ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager); - std::unordered_set handledLayers; - if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) { - return false; - } - - if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) { - return false; - } - - if (!isMultiChannel(subgraph.concatLayers)) { - ConcatTransformation::transform(context, m); - return false; - } - - DataPrecision dataPrecision; - { - std::vector concatChildrenPrecisions = precisionsOnActivations; - for (auto quantizationLayer : subgraph.quantizationLayers) { - std::shared_ptr fq = ngraph::as_type_ptr(quantizationLayer->shared_from_this()); - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - - // define concatenation operation consumers precisions - std::vector fqChildrenPrecisions = precisionsOnActivations; - fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions); - concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions); - if (concatChildrenPrecisions.empty()) { - return false; - } - - // define FakeQuantize precisions without zero point - const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - if (dataPrecision.precision == ngraph::element::undefined) { - dataPrecision = tmp; - continue; - } - - if ((tmp.precision != dataPrecision.precision) && (tmp.precision == ngraph::element::u8)) { - dataPrecision = tmp; - } - } - - if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) { - dataPrecision = DataPrecision(concatChildrenPrecisions[0]); - } - } - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); - if (fq == nullptr) { - return false; - } - - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - } - - std::unordered_map dequantizations; - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr& fakeQuantizeLayer = subgraph.quantizationLayers[i]; - - std::shared_ptr fq = ngraph::as_type_ptr(fakeQuantizeLayer->shared_from_this()); - assert(fq); - - auto newFakeQuantize = NetworkHelper::fuseConvert(fq); - if (newFakeQuantize != nullptr) { - fq = newFakeQuantize; - } - - newFakeQuantize = NetworkHelper::composeFakeQuantize(fq); - if (newFakeQuantize != nullptr) { - fq = newFakeQuantize; - } - - const DataPrecision currentDataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - - // 1. get data for dequantization. Dequantization data will be used several times later. - const FakeQuantizeDequantization fakeQuantizeDequantization = ngraph::pass::low_precision::NetworkHelper::createDequantizationFromFakeQuantize( - fq, - dataPrecision.precision, - dataPrecision.min, - dataPrecision.max, - dataPrecision.precision == currentDataPrecision.precision ? currentDataPrecision.hasZeroPoint : true, - updatePrecisions, - deqPrecision); - dequantizations[fakeQuantizeLayer->get_friendly_name()] = fakeQuantizeDequantization; - - // 2. update FakeQuantize - one time action - const std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( - fq, - updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0), - roundf(dataPrecision.min), - roundf(dataPrecision.max)); - - subgraph.quantizationLayers[i] = newFakeQuantizeLayer; - subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer; - } - - auto dequantizationValuesCallback = [&]( - std::shared_ptr layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate) { - if (layer->get_friendly_name() != originalLayerName) { - const auto update = []( - const std::string& originalLayerName, - const std::string& newLayerName, - std::unordered_map& dequantizationLayers) { - auto it = dequantizationLayers.find(originalLayerName); - if (it != dequantizationLayers.end()) { - dequantizationLayers.emplace(newLayerName, it->second); - dequantizationLayers.erase(it); - } - }; - update(originalLayerName, layer->get_friendly_name(), dequantizations); - } - - fillDequantization( - layer, - dequantizations, - dequantizationsToConcatenate); - - if (!is_type(layer)) { - // for intermediate layers we should get Dq operations to be inserted between layer and child - assert(dequantizationsToConcatenate.size() == 1ul); - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child); - if (layer->get_input_partial_shape(0)[1] != layer->get_output_partial_shape(sourceOutputIdx)[1]) { - dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx); - } - } - }; - - addDequantizationLayers(context, subgraph, dequantizationValuesCallback); - - if (updatePrecisions) { - for (const auto it : subgraph.layers) { - const std::shared_ptr node = it.second; - if (std::dynamic_pointer_cast(node)) { - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision); - } else { - // set precision to explicitly to have updated precision during transformation - for (size_t i = 0; i < node->get_output_size(); ++i) { - node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i)); - } - } - } - } - - for (const std::shared_ptr& quantizationLayer : subgraph.quantizationLayers) { - context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name()); - } - return true; -} - -bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr) const noexcept { - return true; -} - -void ConcatMultiChannelsTransformation::fillDequantization( - const std::shared_ptr layer, - const std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantization) const { - const auto fillDqByFakeQuantize = [&](const std::shared_ptr& fq) { - const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name()); - if (it == dequantizationByFakeQuantize.end()) { - THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found"; - } - - const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; - dequantization.push_back(fakeQuantizeDequantization); - }; - - if (is_type(layer)) { - fillDqByFakeQuantize(layer); - } else { - for (size_t i = 0; i < layer->get_input_size(); ++i) { - std::shared_ptr parent = layer->get_input_node_shared_ptr(i); - if (as_type_ptr(parent)) { - continue; - } - - const auto fakeQuantize = ngraph::as_type_ptr(parent); - if (fakeQuantize) { - fillDqByFakeQuantize(fakeQuantize); - } else { - const auto concat = ngraph::as_type_ptr(parent); - if (concat) { - std::vector dequantizationToConcatenate; - fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); - - // add concatenated dequantization operations to dequantization collection - dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); - } else { - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer); - if (parent->get_input_partial_shape(0)[1] != parent->get_output_partial_shape(sourceOutputIdx)[1]) { - std::vector dequantizationToPropagate; - fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate); - - // add folded dequantization operations to dequantization colection - dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx)); - } else { - fillDequantization(parent, dequantizationByFakeQuantize, dequantization); - } - } - } - } - } -} - -FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization( - const std::shared_ptr concat, - const std::vector& dequantization) const { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; - - // forming nodes for concatenation - fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes); - - std::shared_ptr parent = concat; - std::shared_ptr convert; - if (!convertNodes.empty()) { - convert = as_type_ptr(dequantization[0].convert->clone_with_new_inputs({ parent })); - parent = convert; - } - - std::shared_ptr subtract; - std::shared_ptr subConst; - if (!subtractNodes.empty()) { - subConst = as_type_ptr(concatenateDeqNodes(subtractNodes)); - subtract = std::make_shared(parent, subConst); - parent = subtract; - } - - std::shared_ptr multiply; - std::shared_ptr mulConst; - if (!multiplyNodes.empty()) { - mulConst = as_type_ptr(concatenateDeqNodes(multiplyNodes)); - multiply = std::make_shared(parent, mulConst); - } - - return FakeQuantizeDequantization(concat, convert, subtract, nullptr, subConst, multiply, mulConst); -} - -FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantization( - const std::shared_ptr operation, - const FakeQuantizeDequantization& dequantization, - const size_t sourceOutputIdx) { - OutputVector inputs = operation->input_values(); - OutputVector outputs(operation->get_output_size()); - Output data = operation->output(sourceOutputIdx); - - std::shared_ptr parent = operation; - std::shared_ptr convert; - if (dequantization.convert) { - convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ data })); - parent = convert; - } - - std::shared_ptr subtract; - std::shared_ptr subConst; - if (dequantization.subtract) { - subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx); - subtract = std::make_shared(parent, subConst); - parent = subtract; - } - - std::shared_ptr multiply; - std::shared_ptr mulConst; - if (dequantization.multiply) { - mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx); - multiply = std::make_shared(parent, mulConst); - } - - return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst); -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/convert.cpp b/inference-engine/src/low_precision_transformations/src/convert.cpp index 19bcce50e8c8a6..e96fc4820c77e3 100644 --- a/inference-engine/src/low_precision_transformations/src/convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/convert.cpp @@ -11,6 +11,7 @@ #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,11 +19,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void ConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertTransformation, "ConvertTransformation", 0); + +ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ConvertTransformation"); + this->register_matcher(m, callback); } -bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr convert = as_type_ptr(m.get_match_root()); if (!convert) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp index 1dc4c42b476f34..889315678e9704 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -17,28 +19,39 @@ namespace ngraph { namespace pass { namespace low_precision { -ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvolutionTransformation, "ConvolutionTransformation", 0); -void ConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type({ + ngraph::pattern::wrap_type(), + std::make_shared(OutputVector { + pattern::wrap_type(), + pattern::wrap_type() + }) + }); + + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ConvolutionTransformation"); + this->register_matcher(m, callback); } -bool ConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { - return WeightableLayerTransformation::isQuantized(layer, false); +bool ConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return ConvolutionTransformation::isQuantizedStatic(layer); } +bool ConvolutionTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, false); +} - -bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolution = m.get_match_root(); if (!canConvolutionBeTransformed(context, convolution)) { @@ -150,7 +163,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph reducedConstant->cast_vector()[0]); } - const auto copyNode = convolution->copy_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) }); + const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) }); auto conv = as_type_ptr(copyNode); std::shared_ptr relaxedNewConvolution; if (conv) { @@ -164,6 +177,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph std::vector{deqPrecision, deqPrecision}, std::vector{deqPrecision}); } + NetworkHelper::copyInfo(convolution, relaxedNewConvolution); std::shared_ptr newMultiplyAfter = std::make_shared>( std::vector{ deqPrecision, deqPrecision }, @@ -179,12 +193,18 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph convolution->get_input_node_ptr(0)->get_input_source_output(0), convolution->input_value(1)}); replace_node(convolution, newConvolution); + NetworkHelper::copyInfo(convolution, newConvolution); convolution = newConvolution; } } { - decomposeFakeQuantizeForWeightsPath(convolution); + const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution); + assert((updatePrecisions && decomposed) || (!updatePrecisions)); + if (!updatePrecisions && !decomposed) { + // TODO: LPT: issue #58685 + return false; + } std::shared_ptr reshapeFromWeights = as_type_ptr(convolution->input_value(1).get_node_shared_ptr()); @@ -218,13 +238,16 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph reshapeFromWeights->input_value(1) })); } + auto newConvolution = convolution->clone_with_new_inputs({ + convolution->input_value(0), + reshapeFromWeights != nullptr ? + reshapeFromWeights : + multiplyFromWeights->input_value(0) + }); + NetworkHelper::copyInfo(convolution, newConvolution); + auto newMultiplyAfter = std::make_shared( - convolution->copy_with_new_inputs({ - convolution->input_value(0), - reshapeFromWeights != nullptr ? - reshapeFromWeights : - multiplyFromWeights->input_value(0) - }), + newConvolution, foldConvert( fold_reshape( multiplyFromWeights->input_value(1), @@ -270,6 +293,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph convolution->get_input_node_ptr(1)->input_value(0) : childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})}); replace_node(convolution, newConvolution); + NetworkHelper::copyInfo(convolution, newConvolution); convolution = newConvolution; } diff --git a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp index a73ee1de155781..54e010d3a84a7b 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -18,41 +20,48 @@ namespace pass { namespace low_precision { ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) { + auto matcher = std::make_shared(OutputVector{ + pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() + }), + }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ConvolutionBackpropDataTransformation"); + this->register_matcher(m, callback); } -void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern( - { make_op_label(), make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern( - { make_op_label(), make_op_label(), make_op_label() })); +bool ConvolutionBackpropDataTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return ConvolutionBackpropDataTransformation::isQuantizedStatic(layer); } -bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr layer) const noexcept { - if (deconvolutionSpecificChannelsRatio) { - size_t inputChannels = layer->get_input_shape(0)[1]; - size_t outputChannels = layer->get_output_shape(0)[1]; - if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { - return false; - } - } - return WeightableLayerTransformation::isQuantized(layer, false); +bool ConvolutionBackpropDataTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, false); } -bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolutionBackpropData = m.get_match_root(); if (!canBeTransformed(context, convolutionBackpropData)) { @@ -63,7 +72,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con NetworkHelper::getDequantization(reshapeFromWeights); if (dequantization.empty()) { const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData); - std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights); + auto constantShape = fqOnWeights->input(1).get_partial_shape(); + if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) { + return false; + } + + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize( + fqOnWeights, + false, + (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul); if (reshapeFromWeights != nullptr) { resultConstant = fold_reshape( resultConstant, @@ -198,18 +215,11 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared>(""); } + return true; } bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { - if (deconvolutionSpecificChannelsRatio) { - size_t inputChannels = op->get_input_shape(0)[1]; - size_t outputChannels = op->get_output_shape(0)[1]; - if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { - return false; - } - } - return canConvolutionBeTransformed(context, op); } diff --git a/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp b/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp new file mode 100644 index 00000000000000..7ddd060b06dc6d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp @@ -0,0 +1,22 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/create_precisions_dependent_attribute.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; diff --git a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp index c004d0ca59f92a..09d3b6fac17e33 100644 --- a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp +++ b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp @@ -4,25 +4,32 @@ #include "low_precision/depth_to_space.hpp" -#include #include -#include -#include - +#include #include "low_precision/network_helper.hpp" using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -void DepthToSpaceTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::DepthToSpaceTransformation, "DepthToSpaceTransformation", 0); + +DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "DepthToSpaceTransformation"); + this->register_matcher(m, callback); } -bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr depthToSpace = m.get_match_root(); if (!canBeTransformed(context, depthToSpace)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index 90aeb5aabe8bc2..93e6aa813c1cbb 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "low_precision/network_helper.hpp" @@ -14,11 +15,25 @@ namespace ngraph { namespace pass { namespace low_precision { -void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FakeQuantizeTransformation, "FakeQuantizeTransformation", 0); + +FakeQuantizeTransformation::FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); if (!QuantizationDetails::outputLayoutIsSupported(layer)) { return false; @@ -28,13 +43,14 @@ bool FakeQuantizeTransformation::transform(TransformationContext& context, ngrap return false; } + bool wasHandled = false; std::shared_ptr fakeQuantize = layer; do { - layer = fakeQuantize; - fakeQuantize = fuseElementwise(context, fakeQuantize); + fakeQuantize = fuseElementwise(context, this, fakeQuantize); + wasHandled = wasHandled || (fakeQuantize != nullptr); } while (fakeQuantize != nullptr); - return true; + return wasHandled; } namespace fq { @@ -110,6 +126,7 @@ bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr& e std::shared_ptr FakeQuantizeTransformation::fuseElementwise( TransformationContext& context, + MatcherPass* matcherPass, const std::shared_ptr& fakeQuantize) const { const std::shared_ptr eltwise = fakeQuantize->get_input_node_shared_ptr(0); @@ -172,6 +189,7 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis const auto data = fq::getData(eltwise); const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise); + std::shared_ptr newFakeQuantize = as_type_ptr(fakeQuantize->clone_with_new_inputs({ data->output(outputIdx), inputLowConst_f32, @@ -179,6 +197,8 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis foldConvert(fakeQuantize->input_value(3), deqPrecision), foldConvert(fakeQuantize->input_value(4), deqPrecision) })); + matcherPass->register_new_node(newFakeQuantize); + replace_node(fakeQuantize, newFakeQuantize); ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize); newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name()); diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp index b9d491238aac98..b522546c55e342 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp @@ -6,20 +6,252 @@ #include #include +#include +#include #include "low_precision/common/ie_lpt_exception.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void FakeQuantizeDecompositionTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FakeQuantizeDecompositionTransformation, "FakeQuantizeDecompositionTransformation", 0); + +FakeQuantizeDecompositionTransformation::FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FakeQuantizeDecompositionTransformation"); + this->register_matcher(m, callback); +} + +namespace fq_decomposition { + +// get precision details, depends on: +// 1. FakeQuantize operation parameters (QuantizationDetails::getDetails & LayerTransformation::getPrecisionDetails) +// 2. Precisions on port +DataPrecision getDataPrecisionByOutputPortAndFakeQuantize(std::shared_ptr layer) { + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + auto precisionsAttribute = getAttributeFromOutput>(layer->output(0)); + if (precisionsAttribute == nullptr) { + // TODO: explore this case in more details: + // 1. we should not be here + assert(true); + + // 2. not possible to get optimal precision by decomposed FakeQuantize + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), + DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), + precisionDetailsAtOutputIntervals.hasZeroPoint); + } + + const auto& precisions = precisionsAttribute->get()->sharedValue->precisions; + + ngraph::element::Type precision; + bool hasZeroPoint; + if (precisions.size() > 1ul) { + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + + if (foundIt == precisions.end()) { + precision = *precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } + + // update shared attribute to affect all operations in subgraph + precisionsAttribute->get()->sharedValue->precisions = { precision }; + } else { + // use only available precision + precision = *precisions.begin(); + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + hasZeroPoint = precisionDetailsAtOutputIntervals.precision != precision; + } + + return DataPrecision( + precision, + DataPrecision::getMinValue(precision, quantizationDetails.levels), + DataPrecision::getMaxValue(precision, quantizationDetails.levels), + hasZeroPoint); +} + +// get precision details, depends on: +// 1. FakeQuantize operation parameters (QuantizationDetails::getDetails & LayerTransformation::getPrecisionDetails) +// 2. Precisions on port +DataPrecision getDataPrecisionByOutputPort(std::shared_ptr layer) { + const size_t levels = layer->get_levels(); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + + auto precisionsAttribute = getAttributeFromOutput>(layer->output(0)); + if (precisionsAttribute == nullptr) { + // TODO: explore this case in more details: + // 1. we should not be here + assert(true); + + // 2. not possible to get optimal precision by decomposed FakeQuantize + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, levels), + DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, levels), + precisionDetailsAtOutputIntervals.hasZeroPoint); + } + + const auto& precisions = precisionsAttribute->get()->sharedValue->precisions; + + ngraph::element::Type precision; + bool hasZeroPoint; + if (precisions.size() > 1ul) { + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + + if (foundIt == precisions.end()) { + precision = *precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } + + // update shared attribute to affect all operations in subgraph + precisionsAttribute->get()->sharedValue->precisions = { precision }; + } else { + // use only available precision + precision = *precisions.begin(); + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + hasZeroPoint = precisionDetailsAtOutputIntervals.precision != precision; + } + + return DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + hasZeroPoint); +} + +// TODO: LPT: refactor: use one way to decompose FakeQuantize +std::shared_ptr decomposeFakeQuantize( + MatcherPass* matcherPass, + std::shared_ptr& layer, + const std::shared_ptr& intervalsAlignment, + const DataPrecision& dataPrecision, + const bool updatePrecisions, + const element::Type deqPrecision) { + std::shared_ptr dequantize; + if (intervalsAlignment != nullptr) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "decomposeFakeQuantize1"); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + + float dequantizationMul; + float dequantizationSub; + float updatedOutputLowValue; + float updatedOutputHighValue; + const size_t levels = NetworkHelper::calculateLevels( + dataPrecision.min, + dataPrecision.max, + intervalsAlignment->sharedValue->combinedInterval.low, + intervalsAlignment->sharedValue->combinedInterval.high, + outputLowValues[0], + outputHighValues[0], + dequantizationMul, + dequantizationSub, + updatedOutputLowValue, + updatedOutputHighValue); + + if ((updatePrecisions == false) && (dequantizationMul == 1.f) && (dequantizationSub == 0.f)) { + return nullptr; + } + + //TODO: pass min levels as a parameter? + if (levels < 2ul) { + return nullptr; + } + + // 2. update FakeQuantize - one time action + std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( + layer, + updatePrecisions ? dataPrecision.precision : layer->get_output_element_type(0), + roundf(updatedOutputLowValue), + roundf(updatedOutputHighValue), + false); + matcherPass->register_new_node(newFakeQuantizeLayer); + newFakeQuantizeLayer->set_levels(levels); + + auto dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization( + dequantizationMul, + dequantizationSub, + layer->get_output_element_type(0), + layer->get_output_partial_shape(0), + updatePrecisions ? dataPrecision.precision : layer->get_output_element_type(0), + deqPrecision, + newFakeQuantizeLayer); + + replace_node(layer, dequantization.multiply); + + std::vector> sourceNodes{ layer }; + std::vector> targetNodes{ newFakeQuantizeLayer, dequantization.multiply }; + if (dequantization.convert != nullptr) { + targetNodes.push_back(dequantization.convert); + } + if (dequantization.subtract != nullptr) { + targetNodes.push_back(dequantization.subtract); + } + NetworkHelper::copyInfo(sourceNodes, targetNodes); + + dequantize = dequantization.multiply; + } else { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "decomposeFakeQuantize2"); + // Split FakeQuantize to two parts: Quantize and Dequantize + auto QDQ = NetworkHelper::decomposeFakeQuantize( + as_type_ptr(layer), + dataPrecision.precision, + dataPrecision.min, + dataPrecision.max, + dataPrecision.hasZeroPoint, + updatePrecisions); + + const auto newFakeQuantize = std::get<0>(QDQ); + if (newFakeQuantize == nullptr) { + return nullptr; + } + matcherPass->register_new_node(newFakeQuantize); + dequantize = std::get<1>(QDQ); + } + + return dequantize; } -bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); +} // namespace fq_decomposition + +bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { + auto layer = as_type_ptr(m.get_match_root()); if (!NetworkHelper::isQuantizeSupported(layer)) { return false; } @@ -30,59 +262,24 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c layer = NetworkHelper::fuseConvert(layer); if (NetworkHelper::isConstantPath(layer)) { - // fold fq if constant just before fq and child layers aren't supported in LPT - if (as_type(layer->get_input_node_ptr(0))) { - bool nextOpearionsWillBeNotHandled = true; - for (auto output : layer->outputs()) { - for (auto input : output.get_target_inputs()) { - const auto node = input.get_node(); - - if (as_type(node)) { - for (const auto& child : NetworkHelper::consumers(node->shared_from_this())) { - if ((as_type_ptr(child)) && - (paramsManager->getPrecisionsOnActivations(*child).size() != 0ul)) { - nextOpearionsWillBeNotHandled = false; - break; - } - } - } - - if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { - nextOpearionsWillBeNotHandled = false; - break; - } - } - - if (!nextOpearionsWillBeNotHandled) { - break; - } - } + return false; + } - if (nextOpearionsWillBeNotHandled) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); - if (as_type_ptr(resultConstant)) { - replace_node(layer, resultConstant); - return true; - } - } - } + auto attribute = getAttributeFromOutput>(layer->output(0)); + if ((attribute == nullptr) || (attribute->get()->sharedValue->precisions.empty())) { return false; } - const ngraph::element::Type precision = layer->get_output_element_type(0); - if (DataPrecision::isSupported(precision)) { - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + const ngraph::element::Type outputPrecision = layer->get_output_element_type(0); + if (DataPrecision::isSupported(outputPrecision)) { const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantizationBelow(layer); if (dequantization.empty()) { return false; } - const DataPrecision expectedDataPrecision = getDataPrecision(dequantization.multiply, quantizationDetails, false); - if (expectedDataPrecision.precision == element::undefined) { - return false; - } - - if (expectedDataPrecision.precision == precision) { + const DataPrecision expectedDataPrecision = fq_decomposition::getDataPrecisionByOutputPortAndFakeQuantize(layer); + // TODO: need test to compose FakeQuantize + if ((expectedDataPrecision.precision == element::undefined) || (expectedDataPrecision.precision == outputPrecision)) { return false; } @@ -92,76 +289,122 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c } } - if (as_type(layer->get_input_node_ptr(0))) { - bool nextOpearionsWillBeNotHandled = true; - for (auto output : layer->outputs()) { - for (auto input : output.get_target_inputs()) { - auto activations = paramsManager->getPrecisionsOnActivations(*input.get_node()); - if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { - nextOpearionsWillBeNotHandled = false; - break; - } - } + if (!QuantizationDetails::outputLayoutIsSupported(layer)) { + return false; + } - if (!nextOpearionsWillBeNotHandled) { - break; - } + if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { + return false; + } + + DataPrecision dataPrecision = fq_decomposition::getDataPrecisionByOutputPort(layer); + + std::shared_ptr precisionsAttribute; + { + // TODO: LPT: return attribute (not wrapper) + auto attributeWrapper = getAttributeFromOutput>(layer->output(0)); + if (attributeWrapper == nullptr) { + THROW_IE_LPT_EXCEPTION(*layer) << "PrecisionAttribute is absent"; } + precisionsAttribute = attributeWrapper->get(); + if (precisionsAttribute == nullptr) { + THROW_IE_LPT_EXCEPTION(*layer) << "PrecisionAttribute is absent"; + } + } - if (nextOpearionsWillBeNotHandled) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); - if (as_type_ptr(resultConstant)) { - replace_node(layer, resultConstant); - return true; + std::shared_ptr quantizationAlignment; + for (const auto& input : layer->output(0).get_target_inputs()) { + const auto alignmentValueWrapper = low_precision::getAttribute>(input.get_node()->shared_from_this()); + if (alignmentValueWrapper != nullptr) { + quantizationAlignment = alignmentValueWrapper->get(); + if (quantizationAlignment->sharedValue->value) { + break; } } } - if (!QuantizationDetails::outputLayoutIsSupported(layer)) { - return false; + std::shared_ptr intervalsAlignment; + { + if ((quantizationAlignment != nullptr) && quantizationAlignment->sharedValue->value) { + auto intervalsAlignmentWrapper = low_precision::getAttribute>(layer); + if (intervalsAlignmentWrapper != nullptr) { + intervalsAlignment = intervalsAlignmentWrapper->get(); + } + } } - if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { + // FakeQuantize operations are combined in supported cascade (per tensor quantization) + if ((intervalsAlignment != nullptr) && (intervalsAlignment->sharedValue->minLevels <= 2ul)) { return false; } - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); - const DataPrecision dataPrecision = getDataPrecision(layer, quantizationDetails, false); + // if IntervalsAlignment attribute is defined then, the attribute defines decomposition parameters, + // if IntervalsAlignment attribute is not defined, then FakeQuantize operation intervals define decomposition parameters if (dataPrecision.precision == element::undefined) { - return false; - } + element::Type precision; + const auto levels = layer->get_levels(); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + if (intervalsAlignment == nullptr) { + // define precision by FakeQuantize intervals + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + const auto foundIt = std::find( + precisionsAttribute->sharedValue->precisions.begin(), + precisionsAttribute->sharedValue->precisions.end(), + precisionDetailsAtOutputIntervals.precision); - // Split FakeQuantize to two parts: Quantize and Dequantize - auto QDQ = NetworkHelper::decomposeFakeQuantize( - as_type_ptr(layer), - dataPrecision.precision, - dataPrecision.min, - dataPrecision.max, - dataPrecision.hasZeroPoint, - updatePrecisions); + bool hasZeroPoint; + if (foundIt == precisionsAttribute->sharedValue->precisions.end()) { + precision = *precisionsAttribute->sharedValue->precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - { - const std::shared_ptr multiply = as_type_ptr(std::get<1>(QDQ)); - const std::shared_ptr multiplyConst = as_type_ptr(multiply->get_input_node_shared_ptr(1)); - const std::vector dequantizationScales = multiplyConst->cast_vector(); - - const std::shared_ptr subtract = as_type_ptr(multiply->get_input_node_shared_ptr(0)); - std::vector dequantizationShifts; - if (subtract != nullptr) { - const std::shared_ptr subtractConst = as_type_ptr(subtract->get_input_node_shared_ptr(1)); - dequantizationShifts = subtractConst->cast_vector(); + dataPrecision = DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + hasZeroPoint); } else { - dequantizationShifts = std::vector(dequantizationScales.size()); + // define precision by attribute + if (intervalsAlignment->sharedValue->preferablePrecisions.empty()) { + // TODO: LPT: add user defined preferredPrecision + precision = *precisionsAttribute->sharedValue->precisions.begin(); + } else { + // TODO: LPT: add user defined preferredPrecision + precision = *intervalsAlignment->sharedValue->preferablePrecisions.begin(); + } + + dataPrecision = DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + LayerTransformation::getPrecisionDetails(levels, outputLowValues, outputHighValues).precision != precision); } + } - printDequantizationValues(dequantizationScales, dequantizationShifts); + std::shared_ptr dequantize = fq_decomposition::decomposeFakeQuantize( + this, + layer, + intervalsAlignment, + dataPrecision, + updatePrecisions, + deqPrecision); + if (dequantize == nullptr) { + return false; } -#endif - std::shared_ptr dequantize = std::get<1>(QDQ); updateOutput(context, dequantize, layer); + if (precisionsAttribute->sharedValue->precisions.size() != 1ul) { + precisionsAttribute->sharedValue->precisions = { dataPrecision.precision }; + } + return true; } diff --git a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp index 091380442b8244..5e673a1ef512f4 100644 --- a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp @@ -5,18 +5,32 @@ #include "low_precision/fold_convert.hpp" #include #include -#include "low_precision/fake_quantize.hpp" +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void FoldConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FoldConvertTransformation, "FoldConvertTransformation", 0); + +FoldConvertTransformation::FoldConvertTransformation(const Params& params) : LayerTransformation(params) { + auto subtract = pattern::wrap_type(); + auto matcher = std::make_shared(subtract, "FoldConvertTransformation"); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + this->register_matcher(matcher, callback); } -bool FoldConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FoldConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto subtract = m.get_match_root(); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp new file mode 100644 index 00000000000000..7984d946f865ac --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/fold_fake_quantize.hpp" + +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FoldFakeQuantizeTransformation, "FoldFakeQuantizeTransformation", 0); + +FoldFakeQuantizeTransformation::FoldFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto fakeQuantize = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(fakeQuantize, "FoldFakeQuantizeTransformation"); + this->register_matcher(m, callback); +} + +bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { + const auto fakeQuantize = as_type_ptr(m.get_match_root()); + if (fakeQuantize == nullptr) { + return false; + } + + if (!canBeTransformed(context, fakeQuantize)) { + return false; + } + + const auto constantShape = fakeQuantize->input(1).get_partial_shape(); + if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) { + return false; + } + + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize( + fakeQuantize, + false, + (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul); + if (is_type(resultConstant)) { + replace_node(fakeQuantize, resultConstant); + return true; + } + + return false; +} + +bool FoldFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { + return NetworkHelper::isConstantPath(op); +} + +bool FoldFakeQuantizeTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { + return false; +} + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp b/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp index 38aa2133940308..48fbea0211946a 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp @@ -5,9 +5,11 @@ #include "low_precision/fuse_convert.hpp" #include -#include #include +#include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,21 +17,25 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseConvertTransformation, "FuseConvertTransformation", 0); + +FuseConvertTransformation::FuseConvertTransformation(const Params& params) : LayerTransformation(params) { + auto multiply = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto subtract = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto add = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto matcher = std::make_shared( + std::make_shared(OutputVector{ multiply, subtract, add }), + "FuseConvertTransformation"); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + this->register_matcher(matcher, callback); } std::shared_ptr removeConvertIfPossibleForSubtract( @@ -50,7 +56,7 @@ std::shared_ptr removeConvertIfPossibleForSubtract( return newSubtract; } -bool FuseConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto op = m.get_match_root(); if (!canBeTransformed(context, op)) { return false; @@ -84,10 +90,13 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph replace_node(op, newOp); } - if (newOp != nullptr) { - ngraph::copy_runtime_info({ convert, op }, newOp); - newOp->set_friendly_name(op->get_friendly_name()); + if (newOp == nullptr) { + return false; } + + ngraph::copy_runtime_info({ convert, op }, newOp); + newOp->set_friendly_name(op->get_friendly_name()); + register_new_node(newOp); } return true; diff --git a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp index 6ef45c0b6cae2c..b15b466b4761c0 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp @@ -5,6 +5,7 @@ #include "low_precision/fuse_fake_quantize.hpp" #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +13,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseFakeQuantizeTransformation, "FuseFakeQuantizeTransformation", 0); + +FuseFakeQuantizeTransformation::FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr fakeQuantize = as_type_ptr(m.get_match_root()); do { fakeQuantize = handle(context, fakeQuantize); diff --git a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp index 734d9abec435ec..ccff4188d3a5c1 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp @@ -5,6 +5,8 @@ #include "low_precision/fuse_multiply_to_fake_quantize.hpp" #include #include +#include +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +14,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseMultiplyToFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseMultiplyToFakeQuantizeTransformation, "FuseMultiplyToFakeQuantizeTransformation", 0); + +FuseMultiplyToFakeQuantizeTransformation::FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseMultiplyToFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; @@ -65,6 +80,11 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& replace_node(multiply, newFakeQuantize); NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize); + const auto intervalAlignment = getAttribute(fakeQuantize); + if ((intervalAlignment != nullptr) && (intervalAlignment->get()->levels != 0ul)) { + newFakeQuantize->set_levels(intervalAlignment->get()->levels); + } + updateOutput(context, newFakeQuantize, multiply); return true; } diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp index 8d8d9968802e44..b8ec9b192fd272 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp @@ -5,6 +5,7 @@ #include "low_precision/fuse_subtract_to_fake_quantize.hpp" #include #include +#include #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +13,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseSubtractToFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseSubtractToFakeQuantizeTransformation, "FuseSubtractToFakeQuantizeTransformation", 0); + +FuseSubtractToFakeQuantizeTransformation::FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseSubtractToFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto subtract = m.get_match_root(); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp index 8dd7b0b1ce727e..42d9600d13c7a0 100644 --- a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp @@ -8,24 +8,35 @@ #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::GroupConvolutionTransformation, "GroupConvolutionTransformation", 0); -void GroupConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "GroupConvolutionTransformation"); + this->register_matcher(m, callback); } -bool GroupConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { - return WeightableLayerTransformation::isQuantized(layer, true); +bool GroupConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return GroupConvolutionTransformation::isQuantizedStatic(layer); } -bool GroupConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool GroupConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolution = m.get_match_root(); if (!GroupConvolutionTransformation::canBeTransformed(context, convolution)) { @@ -36,6 +47,10 @@ bool GroupConvolutionTransformation::transform(TransformationContext &context, n return true; } +bool GroupConvolutionTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, true); +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/interpolate.cpp b/inference-engine/src/low_precision_transformations/src/interpolate.cpp index 66aba3fc7c429f..b8538bfd14b5d1 100644 --- a/inference-engine/src/low_precision_transformations/src/interpolate.cpp +++ b/inference-engine/src/low_precision_transformations/src/interpolate.cpp @@ -9,30 +9,50 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -void InterpolateTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label(), - make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label(), - make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::InterpolateTransformation, "InterpolateTransformation", 0); + +InterpolateTransformation::InterpolateTransformation(const Params& params) : LayerTransformation(params) { + auto mul = pattern::wrap_type(); + + auto interpolate1 = pattern::wrap_type({ + mul, + pattern::wrap_type() }); + + auto interpolate4 = pattern::wrap_type({ + mul, + pattern::wrap_type(), + pattern::wrap_type() }); + + auto interpolate4_2 = pattern::wrap_type({ + mul, + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto matcher = std::make_shared( + std::make_shared(OutputVector{ interpolate1, interpolate4, interpolate4_2 }), + "InterpolateTransformation"); + + this->register_matcher(matcher, callback); } -bool InterpolateTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool InterpolateTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr interpolate = m.get_match_root(); if (!canBeTransformed(context, m.get_match_root())) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index d1dc736e536ed4..14d21fa29b67c3 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -25,50 +25,16 @@ const char LayerTransformation::originalLayerPostfix[] = "_original"; LayerTransformation::LayerTransformation(const Params& params) : updatePrecisions(params.updatePrecisions), - quantizedTensorAlignmentOnActivations(params.quantizedTensorAlignmentOnActivations), - quantizedTensorAlignmentOnWeights(params.quantizedTensorAlignmentOnWeights), - supportAsymmetricQuantization(params.supportAsymmetricQuantization), - precisionsOnActivations(params.precisionsOnActivations), - precisionsOnWeights(params.precisionsOnWeights), - deqPrecision(params.deqPrecision), - support3DTensorOnActivations(params.support3DTensorOnActivations), - deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio), - quantizationIntervalAsymmetryThreshold(0.002f), - zeroThreshold(1.e-6f), - minQuantizationLevels(2ul), - paramsManager(nullptr), - layerTransformationsManager(nullptr) {} - -void LayerTransformation::setParamsManager(IParamsManager* paramsManager) noexcept { - this->paramsManager = paramsManager; -} + deqPrecision(params.deqPrecision) {} -void LayerTransformation::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept { - this->layerTransformationsManager = layerTransformationsManager; +void LayerTransformation::setContext(TransformationContext* context) noexcept { + this->context = context; } void LayerTransformation::setUpdatePrecisions(const bool updatePrecisions) { this->updatePrecisions = updatePrecisions; } -void LayerTransformation::setQuantizedTensorAlignmentOnActivations( - const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations; -} - -void LayerTransformation::setQuantizedTensorAlignmentOnWeights( - const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights; -} - -const std::vector& LayerTransformation::getPrecisionsOnActivations() const { - return precisionsOnActivations; -} - -const std::vector& LayerTransformation::getPrecisionsOnWeights() const { - return precisionsOnWeights; -} - bool LayerTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { if (!isQuantized(layer)) { return false; @@ -78,6 +44,10 @@ bool LayerTransformation::canBeTransformed(const TransformationContext& context, return false; } + return canBeTransformedStatic(layer); +} + +bool LayerTransformation::canBeTransformedStatic(const std::shared_ptr& layer) { for (const auto& output : layer->outputs()) { const auto rank = output.get_partial_shape().rank(); if (rank.is_dynamic()) { @@ -120,13 +90,13 @@ bool LayerTransformation::canBeTransformed(const TransformationContext& context, if ((dequantization.subtract != nullptr) && (!perChannelQuantization( dequantization.subtract->get_output_partial_shape(0), - dequantization.subtract->get_input_shape(1)))) { + dequantization.subtractConstant->get_shape()))) { return false; } if ((dequantization.multiply != nullptr) && (!perChannelQuantization( dequantization.multiply->get_output_partial_shape(0), - dequantization.multiply->get_input_shape(1)))) { + dequantization.multiplyConstant->get_shape()))) { return false; } } @@ -158,19 +128,11 @@ bool LayerTransformation::canBeTransformedSpatialDimension(const TransformationC return true; } -bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, const size_t parentIndex) const { - return canSubtractBeHandled(op, NetworkHelper::getDequantization(op, parentIndex)); -} - bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, const FakeQuantizeDequantization& dequantization) const { if (dequantization.empty() || (dequantization.subtract == nullptr)) { return true; } - if (!supportAsymmetricQuantization) { - return false; - } - if (!updatePrecisions) { return true; } @@ -229,36 +191,31 @@ void LayerTransformation::printDequantizationValues( } #endif -void LayerTransformation::setQuantizationIntervalAsymmetryThreshold(const float value) { - this->quantizationIntervalAsymmetryThreshold = value; -} +LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( + const size_t quantizationLevels, + const std::vector& outputLowValues, + const std::vector& outputHighValues) { + // TODO: workaround: hardcoded values + const float zeroThreshold = 1.e-6f; + const float quantizationIntervalAsymmetryThreshold = 0.002f; -void LayerTransformation::setZeroThreshold(const float value) { - this->zeroThreshold = value; -} - -void LayerTransformation::setMinQuantizationLevels(const size_t levels) { - this->minQuantizationLevels = levels; -} - -LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(const QuantizationDetails& quantizationDetails) const { const float asymmetricIntervalSideRatio256 = -128.f / 127.f; bool hasNegative = false; bool signedPrecision = true; bool unsignedPrecision = true; bool hasZeroPoint = false; - for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) { - const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]); - const bool outputLowValueIsNotZero = std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold; + for (size_t i = 0; i < outputLowValues.size(); ++i) { + const bool signedInterval = std::signbit(outputLowValues[i]) != std::signbit(outputHighValues[i]); + const bool outputLowValueIsNotZero = std::fabs(outputLowValues[i]) >= zeroThreshold; if (signedInterval && outputLowValueIsNotZero) { // signed unsignedPrecision = false; hasNegative = true; - if (quantizationDetails.outputHighValues[i] != 0.f) { - const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f; - const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i]; + if (outputHighValues[i] != 0.f) { + const float expectedRatio = quantizationLevels == 256 ? asymmetricIntervalSideRatio256 : -1.f; + const float actualRatio = outputLowValues[i] / outputHighValues[i]; const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio)); if (actual > quantizationIntervalAsymmetryThreshold) { hasZeroPoint = true; @@ -291,6 +248,17 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c } } + // TODO: use this implementation after merge <= not aligned with master +// if (signedPrecision && (!unsignedPrecision)) { +// return LayerTransformation::PrecisionDetails(element::i8, hasNegative, hasZeroPoint); +// } +// +// if ((!signedPrecision) && unsignedPrecision) { +// return LayerTransformation::PrecisionDetails(element::u8, hasNegative, hasZeroPoint); +// } +// +// THROW_TRANSFORMATION_EXCEPTION << "unexpected interval"; + if (!hasZeroPoint) { if (signedPrecision && (!unsignedPrecision)) { return LayerTransformation::PrecisionDetails(element::i8, hasNegative, hasZeroPoint); @@ -304,135 +272,51 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c return LayerTransformation::PrecisionDetails(element::undefined, hasNegative, hasZeroPoint); } -bool LayerTransformation::isQuantized(std::shared_ptr layer) const noexcept { +LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(const QuantizationDetails& quantizationDetails) { + return getPrecisionDetails(quantizationDetails.levels, quantizationDetails.outputLowValues, quantizationDetails.outputHighValues); +} + +bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr& layer) { + const auto nonConstNode = const_cast(layer.get())->shared_from_this(); + const auto dequantization = NetworkHelper::getDequantization(nonConstNode); + return dequantization.subtract != nullptr; +} + +bool LayerTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { return true; } DataPrecision LayerTransformation::getDataPrecision( - std::shared_ptr layer, + const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const bool onWeights) const { + const std::vector& precisions) { #ifdef LPT_PRINT_DEQUANTIZATION_INFO printDequantizationInfo(layer); #endif - std::vector precisions = onWeights ? precisionsOnWeights : precisionsOnActivations; PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails); - { - if (precisionDetailsAtOutputIntervals.precision != element::undefined) { - if (!onWeights) { - fillAvailablePrecisions(layer, precisions); - } - - // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision - if (!precisions.empty()) { - const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); - const element::Type resultPrecision = foundIt != precisions.end() ? - precisionDetailsAtOutputIntervals.precision : - *precisions.begin(); - const DataPrecision dataPrecision( - resultPrecision, - DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), - DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), - foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); - -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - printDequantizationInfo(dataPrecision); -#endif - return dataPrecision; - } + if (precisionDetailsAtOutputIntervals.precision != element::undefined) { + // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision + if (!precisions.empty()) { + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + const element::Type resultPrecision = foundIt != precisions.end() ? + precisionDetailsAtOutputIntervals.precision : + *precisions.begin(); + + const DataPrecision dataPrecision( + resultPrecision, + DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), + DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), + foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); + + return dataPrecision; } } - - const DataPrecision dataPrecision = precisions.empty() ? - DataPrecision(element::undefined, 0.f, 0.f, false) : - DataPrecision( - *precisions.begin(), - DataPrecision::getMinValue(*precisions.begin(), quantizationDetails.levels), - DataPrecision::getMaxValue(*precisions.begin(), quantizationDetails.levels), - true); -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - printDequantizationInfo(dataPrecision); -#endif - return dataPrecision; -} - -void LayerTransformation::fillAvailablePrecisions(std::shared_ptr layer, std::vector& availablePrecisions) const { - if (availablePrecisions.empty()) { - return; - } - - const std::vector> children = NetworkHelper::consumers(layer); - for (auto child : children) { - if (child->get_type_info().is_castable(opset1::FakeQuantize::get_type_info_static())) { - // FakeQuantize layer updates precision - continue; - } - - if (!layerTransformationsManager->isQuantized(child)) { - // low precision chain is interrupted here: next operation supported precisions are ignored - continue; - } - - const std::vector childPrecisionsOnActivations = paramsManager->getPrecisionsOnActivations(*child); - if (childPrecisionsOnActivations.size() == 0ul) { - continue; - } - - for (size_t index = 0ul; index < availablePrecisions.size();) { - const element::Type availablePrecision = availablePrecisions[index]; - if (!std::any_of( - childPrecisionsOnActivations.begin(), - childPrecisionsOnActivations.end(), - [&](const element::Type precision) { return availablePrecision == precision; })) { - availablePrecisions.erase(availablePrecisions.begin() + index); - } else { - ++index; - } - } - - if (!layerTransformationsManager->isPrecisionPreserved(child)) { - continue; - } - - fillAvailablePrecisions(child, availablePrecisions); - if (availablePrecisions.empty()) { - return; - } - } -} - -std::vector> LayerTransformation::getChildrenRecursivelyExceptPrecisionPreserved( - const std::shared_ptr& op) const noexcept { - std::queue> notHandledChildren; - - for (const auto& output : op->outputs()) { - for (const auto& input : output.get_target_inputs()) { - std::shared_ptr child = input.get_node()->shared_from_this(); - notHandledChildren.emplace(child); - } - } - - std::vector> resultChildren; - - while (!notHandledChildren.empty()) { - const std::shared_ptr operation = notHandledChildren.front(); - notHandledChildren.pop(); - - if (!this->layerTransformationsManager->isPrecisionPreserved(operation)) { - resultChildren.push_back(operation); - continue; - } - - for (const auto& output : operation->outputs()) { - for (const auto& input : output.get_target_inputs()) { - std::shared_ptr child = input.get_node()->shared_from_this(); - notHandledChildren.emplace(child); - } - } - } - - return resultChildren; + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + 0.f, + 0.f, + precisionDetailsAtOutputIntervals.hasZeroPoint); } std::shared_ptr LayerTransformation::moveDequantizationAfter( @@ -450,15 +334,15 @@ void LayerTransformation::updateOutput( TransformationContext &context, std::shared_ptr lastNode, std::shared_ptr originalNode) const { - const size_t outputSize = context.function->get_output_size(); - for (size_t i = 0; i < outputSize; ++i) { - std::shared_ptr result = context.function->get_output_op(i); - std::shared_ptr outputNode = result->get_input_node_shared_ptr(0); - if (outputNode.get() == lastNode.get()) { - const std::string originalName = originalNode->get_friendly_name(); - originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); - lastNode->set_friendly_name(originalName); - break; + // TODO: not tested!!! + for (auto output : lastNode->outputs()) { + for (auto input : output.get_target_inputs()) { + if (is_type(input.get_node())) { + const std::string originalName = originalNode->get_friendly_name(); + originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); + lastNode->set_friendly_name(originalName); + break; + } } } } @@ -478,7 +362,7 @@ void LayerTransformation::updateOutput( } } -void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) const { +void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) { ngraph::graph_rewrite_callback internal_callback = [this, &context](ngraph::pattern::Matcher &m) { const bool result = transform(context, m); (void)result; diff --git a/inference-engine/src/low_precision_transformations/src/low_precision.cpp b/inference-engine/src/low_precision_transformations/src/low_precision.cpp new file mode 100644 index 00000000000000..a138b484d7f0d2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/low_precision.cpp @@ -0,0 +1,283 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/low_precision.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "low_precision/align_quantization_intervals.hpp" +#include "low_precision/fake_quantize_decomposition.hpp" +#include "low_precision/markup_precisions.hpp" +#include "low_precision/markup_can_be_quantized.hpp" +#include "low_precision/markup_avg_pool_precision_preserved.hpp" +#include "low_precision/propagate_precisions.hpp" +#include "low_precision/align_quantization_parameters.hpp" + +#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" +#include "low_precision/fold_convert.hpp" +#include "low_precision/pull_reshape_through_dequantization.hpp" +#include "low_precision/pull_transpose_through_dequantization.hpp" + +// branch specific transformations +#include "low_precision/concat.hpp" + +#include "low_precision/fake_quantize_decomposition.hpp" + +// general transformations +#include "low_precision/add.hpp" +#include "low_precision/avg_pool.hpp" +#include "low_precision/clamp.hpp" +#include "low_precision/convolution.hpp" +#include "low_precision/convolution_backprop_data.hpp" +#include "low_precision/depth_to_space.hpp" +#include "low_precision/fake_quantize.hpp" +#include "low_precision/group_convolution.hpp" +#include "low_precision/interpolate.hpp" +#include "low_precision/mat_mul.hpp" +#include "low_precision/max_pool.hpp" +#include "low_precision/multiply.hpp" +#include "low_precision/mvn.hpp" +#include "low_precision/normalize_l2.hpp" +#include "low_precision/prelu.hpp" +#include "low_precision/reduce_max.hpp" +#include "low_precision/reduce_mean.hpp" +#include "low_precision/reduce_min.hpp" +#include "low_precision/reduce_sum.hpp" +#include "low_precision/reshape.hpp" +#include "low_precision/relu.hpp" +#include "low_precision/squeeze.hpp" +#include "low_precision/subtract.hpp" +#include "low_precision/split.hpp" +#include "low_precision/shuffle_channels.hpp" +#include "low_precision/strided_slice.hpp" +#include "low_precision/transpose.hpp" +#include "low_precision/unsqueeze.hpp" +#include "low_precision/variadic_split.hpp" + +// cleanup transformations +#include "low_precision/convert.hpp" +#include "low_precision/fold_fake_quantize.hpp" +#include "low_precision/fuse_convert.hpp" +#include "low_precision/fuse_fake_quantize.hpp" +#include "low_precision/fuse_subtract_to_fake_quantize.hpp" +#include "low_precision/fuse_multiply_to_fake_quantize.hpp" +#include "low_precision/multiply_to_group_convolution.hpp" +#include "low_precision/subtract_multiply_to_multiply_add.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::LowPrecision, "LowPrecision", 0); + +ngraph::pass::low_precision::LowPrecision::LowPrecision( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions, + const LayerTransformation::Params params) : + precisionRestrictions(precisionRestrictions), + quantizationRestrictions(quantizationRestrictions), + params(params) { +} + +using namespace ngraph::pass::low_precision; + +template +void make_matcher_type_relaxed(ngraph::pass::GraphRewrite* transformation) { + using namespace ngraph; + + auto is_op_type = [](std::shared_ptr n) { + return !!as_type_ptr(n); + }; + + auto p_node = std::make_shared(element::f32, Shape{}, is_op_type); + + ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher& m) { + auto l_node = std::dynamic_pointer_cast(m.get_match_root()); + if (std::dynamic_pointer_cast(l_node)) { + return false; + } + if (!l_node) { + THROW_IE_LPT_EXCEPTION(*l_node) << "unexpected operation type"; + } + + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "LowPrecisionTypeRelaxedMatcher"); + + std::vector inputPrecisions; + for (auto& inputs : l_node->inputs()) { + inputPrecisions.push_back(inputs.get_element_type()); + } + + std::vector outputPrecisions; + for (auto& output : l_node->outputs()) { + outputPrecisions.push_back(output.get_element_type()); + } + + auto replacement = std::make_shared>(*l_node, inputPrecisions, outputPrecisions); + + copy_runtime_info(l_node, replacement); + replace_node(l_node, replacement); + return true; + }; + + auto m = std::make_shared(p_node, "TypeRelaxedReplacer"); + NGRAPH_SUPPRESS_DEPRECATED_START + transformation->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE); + NGRAPH_SUPPRESS_DEPRECATED_END +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::TypeRelaxedReplacer, "TypeRelaxedReplacer", 0); + +ngraph::pass::low_precision::TypeRelaxedReplacer::TypeRelaxedReplacer() { + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupOptimizations, "MarkupOptimizations", 0); + +MarkupOptimizations::MarkupOptimizations( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions) : + precisionRestrictions(precisionRestrictions), + quantizationRestrictions(quantizationRestrictions) {} + +bool ngraph::pass::low_precision::MarkupOptimizations::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager markup(get_pass_config()); + markup.set_per_pass_validation(false); + markup.register_pass(); + if (!precisionRestrictions.empty()) { + markup.register_pass(precisionRestrictions); + } + if (!quantizationRestrictions.empty()) { + markup.register_pass(quantizationRestrictions); + } + if (ngraph::op::util::has_op_with_type(f)) { + markup.register_pass(); + } + markup.register_pass(); + if (ngraph::op::util::has_op_with_type(f)) { + markup.register_pass(); + markup.register_pass(); + } + markup.run_passes(f); + return false; +} + +bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr f) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "LowPrecision"); + + auto passConfig = get_pass_config(); + ngraph::pass::Manager manager(passConfig); + + auto prerequisites = manager.register_pass(); + const std::vector supportedTypes = {ngraph::element::i8, ngraph::element::u8}; + prerequisites->add_matcher(supportedTypes); + prerequisites->add_matcher(supportedTypes); + prerequisites->add_matcher(); + + manager.register_pass(); + + manager.register_pass(precisionRestrictions, quantizationRestrictions); + + std::shared_ptr common = manager.register_pass(); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + + std::shared_ptr cleanup = manager.register_pass(); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + // WA: precision restrictions for groupConv must be propagated to MultiplyToGroupConvolution transformation + cleanup->add_matcher( + params, + OperationPrecisionRestriction::getPrecisionsByOperationType(precisionRestrictions)); + manager.register_pass(params); + manager.register_pass(params); + manager.register_pass(); + + manager.run_passes(f); + return false; +} + +bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr& function) { + std::set> handledNodes; + std::deque> nodes; + for (auto result : function->get_results()) { + nodes.push_front(result); + } + + while (!nodes.empty()) { + auto node = nodes.front(); + nodes.pop_front(); + + for (size_t i = 0; i < node->inputs().size(); ++i) { + auto parent = node->get_input_node_shared_ptr(i); + if (handledNodes.find(parent) != handledNodes.end()) { + continue; + } + + const std::shared_ptr fakeQuantize = as_type_ptr(parent); + if ((fakeQuantize != nullptr) && + QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && + QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { + return true; + } + + nodes.push_front(parent); + handledNodes.insert(parent); + } + } + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp b/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp new file mode 100644 index 00000000000000..2dc256920c74b8 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_avg_pool_precision_preserved.hpp" +#include +#include +#include "low_precision/create_precisions_dependent_attribute.hpp" +#include "low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/update_shared_precision_preserved.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved, "MarkupAvgPoolPrecisionPreserved", 0); + +bool ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr markupAvgPoolPrecision = manager.register_pass(); + markupAvgPoolPrecision->add_matcher>(); + markupAvgPoolPrecision->add_matcher>(); + markupAvgPoolPrecision->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp b/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp new file mode 100644 index 00000000000000..3117efc2debd14 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_can_be_quantized.hpp" + +#include + +#include +#include "low_precision/convolution.hpp" +#include "low_precision/convolution_backprop_data.hpp" +#include "low_precision/group_convolution.hpp" +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupCanBeQuantized, "MarkupCanBeQuantized", 0); + +bool ngraph::pass::low_precision::MarkupCanBeQuantized::run_on_function(std::shared_ptr f) { + auto setEmptyPrecisions = [](const std::shared_ptr& node) { + for (auto& input : node->inputs()) { + auto& rt = input.get_rt_info(); + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(std::vector()); + auto attributeWrapper = std::make_shared>>(attribute); + + rt.emplace( + ngraph::VariantWrapper>::type_info.name, + attributeWrapper); + } + }; + + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0 || transformation_callback(node)) { + continue; + } + + if (const auto convolution = std::dynamic_pointer_cast(node)) { + if (!ConvolutionTransformation::isQuantizedStatic(convolution)) { + setEmptyPrecisions(convolution); + } + continue; + } + if (const auto convolutionBackpropData = std::dynamic_pointer_cast(node)) { + if (!ConvolutionBackpropDataTransformation::isQuantizedStatic(convolutionBackpropData)) { + setEmptyPrecisions(convolutionBackpropData); + } + continue; + } + if (const auto groupConvolution = std::dynamic_pointer_cast(node)) { + if (!GroupConvolutionTransformation::isQuantizedStatic(groupConvolution)) { + setEmptyPrecisions(groupConvolution); + } + continue; + } + } + return true; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp b/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp new file mode 100644 index 00000000000000..4cd37c94658a53 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_per_tensor_quantization.hpp" + +#include +#include +#include +#include +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupPerTensorQuantization, "MarkupPerTensorQuantization", 0); + +ngraph::pass::low_precision::MarkupPerTensorQuantization::MarkupPerTensorQuantization( + const std::vector& restrictions) { + for (const OperationPerTensorQuantizationRestriction& restriction : restrictions) { + const auto it = restrictionsByOperation.find(restriction.operationType.name); + if (it == restrictionsByOperation.end()) { + PerTensorQuantization r(restriction.specifyVersion); + r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictedPorts); + restrictionsByOperation.emplace(restriction.operationType.name, r); + } else { + it->second.add(restriction.operationType.version, restriction.restrictedPorts); + } + } +} + +bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_function(std::shared_ptr f) { + auto setRestriction = [](const std::shared_ptr& node, const std::vector& restrictedPorts) { + auto createAttribute = [](Input& input){ + auto &rt = input.get_rt_info(); + rt.emplace( + ngraph::VariantWrapper::type_info.name, + std::make_shared<::ngraph::VariantWrapper>(PerTensorQuantizationAttribute())); + }; + + if (restrictedPorts.empty()) { + // markup all ports + for (size_t item = 0ul; item < node->get_input_size(); item++) { + Input input = node->input(item); + createAttribute(input); + } + } else { + // markup specific ports + for (const size_t item : restrictedPorts) { + Input input = node->input(item); + createAttribute(input); + } + } + }; + + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0) { + continue; + } + + const auto typeIt = restrictionsByOperation.find(node->get_type_info().name); + if (typeIt == restrictionsByOperation.end()) { + continue; + } + + const auto& restriction = typeIt->second; + if (restriction.portsByVersion.empty()) { + continue; + } + + if (restriction.versionIsRequired) { + const auto it2 = restriction.portsByVersion.find(node->get_type_info().version); + if (it2 == restriction.portsByVersion.end()) { + continue; + } + + const std::vector& restrictedPorts = it2->second; + setRestriction(node, restrictedPorts); + } else { + assert(restriction.portsByVersion.size() == 1ul); + const std::vector& restrictedPorts = restriction.portsByVersion.begin()->second; + setRestriction(node, restrictedPorts); + } + } + return true; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp b/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp new file mode 100644 index 00000000000000..17747179345c1f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp @@ -0,0 +1,217 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_precisions.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupPrecisions, "MarkupPrecisions", 0); + +ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(const std::vector& restrictions) { + for (const auto& restriction : restrictions) { + const auto it = restrictionsByOperation.find(restriction.operationType.name); + if (it == restrictionsByOperation.end()) { + Restriction r(restriction.specifyVersion); + r.precisionsByVersion.emplace(restriction.operationType.version, restriction.precisionsByPort); + restrictionsByOperation.emplace(restriction.operationType.name, r); + } else { + it->second.add(restriction.operationType.version, restriction.precisionsByPort); + } + } +} + +namespace { +void setRestriction( + const std::shared_ptr& node, + const std::vector>>& precisionsByPort) { + if (precisionsByPort.empty()) { + // if available precisions for any port is empty then mark all input ports + for (auto& input : node->inputs()) { + auto& rt = input.get_rt_info(); + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(std::vector()); + auto attributeWrapper = std::make_shared>>(attribute); + + rt.emplace( + ngraph::VariantWrapper>::type_info.name, + attributeWrapper); + } + } else { + for (const std::pair>& item : precisionsByPort) { + Input input = node->input(item.first); + + auto precisionsAttribute = ngraph::pass::low_precision::getAttribute>(input); + if ((precisionsAttribute != nullptr) && + (precisionsAttribute->get()->sharedValue != nullptr) && + (precisionsAttribute->get()->sharedValue->precisions.empty())) { + return; + } + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(item.second); + auto attributeWrapper = std::make_shared>>(attribute); + + auto& rt = input.get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = attributeWrapper; + } + } +} +} // namespace + +bool ngraph::pass::low_precision::MarkupPrecisions::run_on_function(std::shared_ptr f) { + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0) { + continue; + } + + if (transformation_callback(node)) { + continue; + } + + // TODO: don't need to set restrictions for not supported operations + // if don't set restrictions for not supported operations then accuracy drop appears, issue #59197 + const bool supported = is_type(node) || isSupported(node); + if (!supported || !LayerTransformation::canBeTransformedStatic(node)) { + setRestriction(node, std::vector>> { {0ul, {}}}); + continue; + } + + const bool precisionPreserved = isPrecisionPreserved(node); + if (precisionPreserved) { + auto& rt = node->get_rt_info(); + rt.emplace( + ngraph::VariantWrapper::type_info.name, + std::make_shared<::ngraph::VariantWrapper>( + make_shared_attribute(precisionPreserved))); + } + + const auto& typeInfo = node->get_type_info(); + auto it = restrictionsByOperation.find(typeInfo.name); + if (it != restrictionsByOperation.end()) { + const Restriction& r = it->second; + if (r.versionIsRequired) { + const auto it2 = r.precisionsByVersion.find(typeInfo.version); + if (it2 == r.precisionsByVersion.end()) { + continue; + } + + const std::vector>>& precisionsByPort = it2->second; + setRestriction(node, precisionsByPort); + } else { + assert(r.precisionsByVersion.size() == 1ul); + + const std::vector>>& precisionsByPort = r.precisionsByVersion.begin()->second; + setRestriction(node, precisionsByPort); + } + } + } + return true; +} + +template +std::string name() { + return Operation::get_type_info_static().name; +} + +bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const std::shared_ptr& node) { + if (isDisabled(node)) { + return false; + } + + // TODO: think how to handle conditions <= not mandatory for PoC + // TODO: operation set version is not affected <= not mandatory for PoC + static std::unordered_set precisionPreservedOps = { + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // TODO: there are conditions + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() } + }; + + const bool precisionPreserved = precisionPreservedOps.find(node->get_type_name()) != precisionPreservedOps.end(); + if (precisionPreserved) { + return precisionPreserved; + } + + if (is_type(node)) { + std::shared_ptr interpolate1 = as_type_ptr(node); + if (interpolate1) { + const auto attrs = interpolate1->get_attrs(); + return attrs.mode == "nearest"; + } + + std::shared_ptr interpolate4 = as_type_ptr(node); + if (interpolate4) { + const auto attrs = interpolate4->get_attrs(); + return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest; + } + } + + return false; +} + +bool ngraph::pass::low_precision::MarkupPrecisions::isSupported(const std::shared_ptr& node) { + static std::unordered_set supportedOps = { + { name() }, + { name() }, + { name() }, + { name() }, + // ? + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // TODO: there are conditions + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // ? + { name() }, + { name() }, + { name() }, + { name() } + }; + + return supportedOps.find(node->get_type_name()) != supportedOps.end(); +} diff --git a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp index 1d9745da53f9dc..693d0e6490e2e9 100644 --- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp +++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp @@ -9,6 +9,9 @@ #include #include +#include +#include + #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -16,20 +19,33 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MatMulTransformation, "MatMulTransformation", 0); + +MatMulTransformation::MatMulTransformation(const Params& params) : LayerTransformation(params) { + auto mul1 = pattern::wrap_type(); + auto mul2 = pattern::wrap_type(); + auto fq2 = pattern::wrap_type(); + auto matcher = pattern::wrap_type({ mul1, std::make_shared(OutputVector{ mul2, fq2 })}); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MatMulTransformation"); + this->register_matcher(m, callback); +} + +bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr matMul = as_type_ptr(m.get_match_root()); if ((matMul == nullptr) || !canBeTransformed(context, matMul)) { return false; } matMul = as_type_ptr(NetworkHelper::separateInStandaloneBranch(matMul)); - if (!support3DTensorOnActivations) { - const auto inputRank = matMul->get_input_partial_shape(0).rank(); - if (inputRank.is_dynamic() || inputRank.get_length() == 3) { - return false; - } - } - const auto dequantization1 = NetworkHelper::getDequantization(matMul, 0); auto dequantization2 = NetworkHelper::getDequantization(matMul, 1); @@ -38,7 +54,12 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat as_type_ptr(dequantization2.data.get_node_shared_ptr()); if (fakeQuantize != nullptr) { const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); - const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, true); + + const auto precisionsAttribute = getAttributeFromOutput(fakeQuantize); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); auto tuple = NetworkHelper::decomposeFakeQuantize( fakeQuantize, @@ -147,27 +168,20 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat replace_node(matMul, newMultiply); copy_runtime_info({ newMultiply, matMul }, newMultiply); - updateOutput(context, newMultiply, matMul); + updateOutput(context, newMultiply, newMatMul); return true; } -void MatMulTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); -} - bool MatMulTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { return false; } +bool MatMulTransformation::is3DTensorOnActivations(const std::shared_ptr& node) { + const auto inputDataRank = node->get_input_partial_shape(0).rank(); + return inputDataRank.is_dynamic() || inputDataRank.get_length() == 3; +} + bool MatMulTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { if (!LayerTransformation::canBeTransformedSpatialDimension(context, layer)) { return false; @@ -204,6 +218,8 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context if (!NetworkHelper::checkZeroPoint(dequantization1.subtract)) { return false; } + } else { + return false; } const auto dequantization2 = NetworkHelper::getDequantization(layer, 1); @@ -240,7 +256,13 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context } const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); - const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, true); + + const auto precisionsAttribute = getAttribute(matMul->input(1)); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); if (dataPrecision.hasZeroPoint) { return false; } @@ -259,6 +281,10 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context } } + if (!fakeQuantize && dequantization2.empty()) { + return false; + } + if ((!NetworkHelper::isConstantPath(layer->get_input_node_shared_ptr(1))) && (dequantization1.subtract)) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/max_pool.cpp b/inference-engine/src/low_precision_transformations/src/max_pool.cpp index 4f867cc4bdda49..68a73cac59e522 100644 --- a/inference-engine/src/low_precision_transformations/src/max_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/max_pool.cpp @@ -8,20 +8,29 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MaxPoolTransformation, "MaxPoolTransformation", 0); + MaxPoolTransformation::MaxPoolTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void MaxPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); + auto m = std::make_shared(matcher, "MaxPoolTransformation"); + this->register_matcher(m, callback); } bool MaxPoolTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { @@ -42,7 +51,7 @@ bool MaxPoolTransformation::canBeTransformed(const TransformationContext& contex return true; } -bool MaxPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MaxPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/multiply.cpp b/inference-engine/src/low_precision_transformations/src/multiply.cpp index bf354bfc5f0613..d95fe2812c3f1e 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp @@ -12,6 +12,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/network_helper.hpp" @@ -20,11 +22,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void MultiplyTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MultiplyTransformation, "MultiplyTransformation", 0); + +MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MultiplyTransformation"); + this->register_matcher(m, callback); } -bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto multiply = m.get_match_root(); if (!LayerTransformation::canBeTransformed(context, multiply)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp index 9d477ed11c4b05..9b4a6147b61c07 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp @@ -5,17 +5,33 @@ #include "low_precision/multiply_to_group_convolution.hpp" #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void MultiplyToGroupConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation, "MultiplyToGroupConvolutionTransformation", 0); + +MultiplyToGroupConvolutionTransformation::MultiplyToGroupConvolutionTransformation( + const Params& params, + const OperationPrecisionRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MultiplyToGroupConvolutionTransformation"); + this->register_matcher(m, callback); } -bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; @@ -31,11 +47,34 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& } auto dequantization = NetworkHelper::getDequantization(multiply, inputIndex); + if (dequantization.data.get_node() == nullptr) { + return false; + } if (dequantization.subtractConvert != nullptr) { dequantization = NetworkHelper::foldDequantization(multiply, inputIndex); } - const element::Type weightsPrecision = updatePrecisions ? precisionsOnWeights[0] : dequantization.data.get_element_type(); + element::Type weightsPrecision = element::undefined; + if (updatePrecisions) { + // try to find restrictions on weights for GroupConvolution + if (restrictions.size() > 1ul) { + const auto& availablePreisions = restrictions[1].second; + if (!availablePreisions.empty()) { + weightsPrecision = availablePreisions[0]; + } + } + + // if restrictions are absent precisions attribute is used + if (weightsPrecision == element::undefined) { + const auto precisionsAttribute = getAttribute(multiply->input(inputIndex == 0ul ? 1ul : 0ul)); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + weightsPrecision = precisions[0]; + } + } else { + weightsPrecision = dequantization.data.get_element_type(); + } const size_t inputChannelsCount = input->get_output_partial_shape(0)[1].get_length(); const size_t outputChannelsCount = multiply->get_output_partial_shape(0)[1].get_length(); @@ -140,21 +179,21 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma return false; } - const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex); - - if (dequantization.empty()) { - return false; - } - for (size_t i = 2; i < constShape.size(); ++i) { if (constShape[i] != 1) { return false; } } - if (updatePrecisions) { + if (updatePrecisions && restrictions.size() > 0) { + const auto& availablePreisions = restrictions[0].second; + if (availablePreisions.empty()) { + return false; + } + + const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex); const element::Type parentPrecision = dequantization.data.get_element_type(); - if (std::find(precisionsOnActivations.begin(), precisionsOnActivations.end(), parentPrecision) == precisionsOnActivations.end()) { + if (std::find(availablePreisions.begin(), availablePreisions.end(), parentPrecision) == availablePreisions.end()) { return false; } } @@ -162,7 +201,11 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma return true; } -bool MultiplyToGroupConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { +bool MultiplyToGroupConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(layer); +} + +bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(const std::shared_ptr& layer) noexcept { const auto parent0 = layer->get_input_node_shared_ptr(0); const auto parent1 = layer->get_input_node_shared_ptr(1); @@ -179,6 +222,35 @@ bool MultiplyToGroupConvolutionTransformation::isQuantized(std::shared_ptr return (pShape.rank().get_length() == 4ul) || (pShape.rank().get_length() == 5ul); } +bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr& node) { + auto getConstantIndex = [](const std::shared_ptr& node) -> int { + if (is_type(node->get_input_node_shared_ptr(1))) { + return 1; + } + if (is_type(node->get_input_node_shared_ptr(0))) { + return 0; + } + return -1; + }; + + const int constantIndex = getConstantIndex(node); + if (constantIndex == -1) { + return false; + } + + const Input constantInput = node->input(constantIndex); + const auto shape = constantInput.get_partial_shape(); + if (shape.is_dynamic() || shape.rank().is_dynamic()) { + return true; + } + + if (std::all_of(shape.begin(), shape.end(), [](const Dimension& dimension) { return dimension == 1ul; })) { + return true; + } + + return false; +} + void MultiplyToGroupConvolutionTransformation::setGroupSize(const size_t groupSize) { this->groupSize = groupSize; } diff --git a/inference-engine/src/low_precision_transformations/src/mvn.cpp b/inference-engine/src/low_precision_transformations/src/mvn.cpp index dc6df6d5b0fa4e..7883235e42de44 100644 --- a/inference-engine/src/low_precision_transformations/src/mvn.cpp +++ b/inference-engine/src/low_precision_transformations/src/mvn.cpp @@ -10,6 +10,9 @@ #include #include +#include +#include + #include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type_traits.hpp" #include "low_precision/network_helper.hpp" @@ -21,6 +24,8 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MVNTransformation, "MVNTransformation", 0); + namespace mvn { template @@ -38,6 +43,24 @@ std::shared_ptr createNewScalesConst(const ngraph::op::Con } // namespace mvn +MVNTransformation::MVNTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = std::make_shared(OutputVector{ + pattern::wrap_type({ pattern::wrap_type() }), + pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }) + }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MVNTransformation"); + this->register_matcher(m, callback); +} + bool MVNTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr operation) const { if (!LayerTransformation::canBeTransformed(context, operation)) { return false; @@ -86,19 +109,7 @@ bool MVNTransformation::canBeTransformed(const TransformationContext& context, s return perTensor && isScalarScales; } -void MVNTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), - make_op_label() })); -} - -bool MVNTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool MVNTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr operation = m.get_match_root(); if (!canBeTransformed(context, operation)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 6b26398878ca4f..3f49e8b327cc04 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -20,6 +20,9 @@ #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/layer_transformation.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" namespace ngraph { namespace pass { @@ -286,26 +289,65 @@ std::shared_ptr NetworkHelper::swapMultiplyAndAdd(std::shared_ptr{ multiply->get_output_element_type(0) }, ngraph::op::TemporaryReplaceOutputType(newAdd, element::f32).get(), ngraph::op::TemporaryReplaceOutputType(a, element::f32).get()); - copyInfo(multiply, newMultiply); + copyInfo({ multiply, newMultiply }, newMultiply); replace_node(addAfterMultiply, newMultiply); return newMultiply; } -void NetworkHelper::copyInfo(const std::shared_ptr& source, const std::shared_ptr& target) { - // TODO: merge_runtime_info with correctly defined DEQUANTIZATION - const auto& sourceAttributes = source->get_rt_info(); - auto& targetAttrubutes = target->get_rt_info(); - for (auto attribute : sourceAttributes) { - targetAttrubutes[attribute.first] = attribute.second; - } +void NetworkHelper::copyInfo( + const std::vector>& sources, + const std::vector>& targets) { + ngraph::copy_runtime_info(sources, targets); + + for (const auto& target : targets) { + const std::string friendlyName = sources[0]->get_friendly_name(); + if (!friendlyName.empty()) { + target->set_friendly_name(friendlyName); + } + + { + // TODO: has to be implemented in ngraph::copy_runtime_info + + for (auto& source : sources) { + if (target->get_type_info() != source->get_type_info()) { + continue; + } - const std::string friendlyName = source->get_friendly_name(); - if (!friendlyName.empty()) { - target->set_friendly_name(friendlyName); + assert(source->get_input_size() == target->get_input_size()); + for (size_t i = 0; i < target->get_input_size(); ++i) { + auto sourceInput = source->input(i); + const auto& sourceRt = sourceInput.get_rt_info(); + auto targetInput = target->input(i); + auto& targetRt = targetInput.get_rt_info(); + for (const auto& it : sourceRt) { + targetRt[it.first] = it.second; + } + } + + assert(source->get_output_size() == target->get_output_size()); + for (size_t i = 0; i < target->get_output_size(); ++i) { + auto sourceOutput = source->output(i); + const auto& sourceRt = sourceOutput.get_rt_info(); + auto targetOutput = target->output(i); + auto& targetRt = targetOutput.get_rt_info(); + for (const auto& it : sourceRt) { + targetRt[it.first] = it.second; + } + } + } + } } } +void NetworkHelper::copyInfo(const std::vector>& sources, const std::shared_ptr& target) { + copyInfo(sources, std::vector>{ target }); +} + +void NetworkHelper::copyInfo(const std::shared_ptr& source, const std::shared_ptr& target) { + copyInfo(std::vector>{ source }, std::vector>{ target }); +} + void NetworkHelper::cleanRunTimeInfo(const std::shared_ptr& layer) { auto& rt_info = layer->get_rt_info(); auto attributeIter = rt_info.find("DEQUANTIZATION"); @@ -315,7 +357,21 @@ void NetworkHelper::cleanRunTimeInfo(const std::shared_ptr& layer) { } bool NetworkHelper::isScalarLike(std::shared_ptr constant) { - return constant->get_all_data_elements_bitwise_identical(); + // ticket #48857 + // return constant->get_all_data_elements_bitwise_identical(); + + const auto shape = constant->output(0).get_shape(); + if (shape_size(shape) == 1ul) { + return true; + } + + + const auto values = constant->cast_vector(); + if (values.empty()) { + return true; + } + + return !std::any_of(values.begin(), values.end(), [&](float value) { return values[0] != value; }); } bool NetworkHelper::isZero(std::shared_ptr constant) { @@ -524,8 +580,10 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha if (dequantization.isShared()) { Output parent = dequantization.data; if (dequantization.convert != nullptr) { - parent = dequantization.convert->clone_with_new_inputs({ parent }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + auto convert = dequantization.convert->clone_with_new_inputs({ parent }); + convert->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), convert); + parent = convert->output(0); } if (dequantization.subtract != nullptr) { @@ -537,15 +595,19 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha outputs.push_back(input.get_source_output()); } - parent = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + auto subtract = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) }); + subtract->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), subtract); + parent = subtract->output(0); } if (dequantization.multiply != nullptr) { - parent = dequantization.multiply->clone_with_new_inputs({ + auto multiply = dequantization.multiply->clone_with_new_inputs({ parent, dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + multiply->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), multiply); + parent = multiply->output(0); } std::vector> inputs = node->input_values(); @@ -556,7 +618,7 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha const size_t inputIndex = NetworkHelper::getChildInputIndex(originalParent, node); inputs[inputIndex] = parent; const std::shared_ptr newNode = node->clone_with_new_inputs(inputs); - + copy_runtime_info(node, newNode); replace_node(node, newNode); newNode->set_friendly_name(node->get_friendly_name()); @@ -592,10 +654,49 @@ std::shared_ptr NetworkHelper::fuseConvert(const std::shar fakeQuantize->get_levels()); NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0)); replace_node(node->shared_from_this(), newFakeQuantize); - newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name()); + NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize); + return newFakeQuantize; } +bool NetworkHelper::isPrecisionPreserved(const std::shared_ptr& node) { + auto& rt = node->get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return false; + } + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute->get()->sharedValue->value; +} + +size_t NetworkHelper::calculateLevels( + const float dataPrecisionMin, + const float dataPrecisionMax, + const float combinedIntervalLow, + const float combinedIntervalHigh, + const float minIntervalLow, + const float minIntervalHigh, + float& dequantizationMul, + float& dequantizationSub, + float& updatedOutputLowValue, + float& updatedOutputHighValue) { + const float maxOutputInterval = combinedIntervalHigh - combinedIntervalLow; + // FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization -> + const float quantizationMul = (dataPrecisionMax - dataPrecisionMin) / maxOutputInterval; + dequantizationMul = maxOutputInterval / (dataPrecisionMax - dataPrecisionMin); + + // FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub + const float quantizationSub = combinedIntervalLow - dataPrecisionMin * dequantizationMul; + dequantizationSub = std::round(-quantizationSub * quantizationMul); + + updatedOutputLowValue = (minIntervalLow - quantizationSub) * quantizationMul; + updatedOutputHighValue = (minIntervalHigh - quantizationSub) * quantizationMul; + + const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); + return levels; +} + std::shared_ptr NetworkHelper::foldFakeQuantize( const std::shared_ptr& fq, const bool roundValuesArg, @@ -772,7 +873,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_levels(), newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.convert, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ fakeQuantize, dequantization.convert }, replacement); NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, dequantization.convert->output(0).get_element_type()); newFakeQuantize = replacement; } @@ -791,7 +893,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_levels(), newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.subtract, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ newFakeQuantize, dequantization.subtract }, replacement); newFakeQuantize = replacement; } @@ -827,7 +930,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.multiply, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ newFakeQuantize, dequantization.multiply }, replacement); newFakeQuantize = replacement; } @@ -872,6 +976,12 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos } } + if ((!updatePrecision) && + std::all_of(scales.begin(), scales.end(), [](const float value) { return value == 1.f; }) && + std::all_of(shifts.begin(), shifts.end(), [](const float value) { return value == 0.f; })) { + return std::make_tuple(nullptr, nullptr); + } + std::shared_ptr shift = hasZeroPoint ? std::make_shared(deqPrecision, outputLow.get_shape(), shifts) : nullptr; @@ -980,7 +1090,8 @@ std::shared_ptr NetworkHelper::updateFakeQuantize( std::shared_ptr fq, element::Type precision, float min, - float max) { + float max, + const bool replace) { auto newMin = std::make_shared(fq->get_output_element_type(0), Shape{}, min); auto newMax = std::make_shared(fq->get_output_element_type(0), Shape{}, max); @@ -994,7 +1105,9 @@ std::shared_ptr NetworkHelper::updateFakeQuantize( fq->get_auto_broadcast()); NetworkHelper::setOutDataPrecision(newFQ, precision); - replace_node(fq, newFQ); + if (replace) { + replace_node(fq, newFQ); + } newFQ->set_friendly_name(fq->get_friendly_name()); return newFQ; @@ -1006,9 +1119,12 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization( const ngraph::element::Type originalPrecision, const ngraph::PartialShape dataNodeOutputShape, element::Type precision, - const ngraph::element::Type deqPrecision) { - // TODO: we create input here! we really need it here? - const std::shared_ptr input = std::make_shared(precision, dataNodeOutputShape); + const ngraph::element::Type deqPrecision, + std::shared_ptr input) { + if (input == nullptr) { + // TODO: we create input here! we really need it here? + input = std::make_shared(precision, dataNodeOutputShape); + } std::shared_ptr parent = input; std::shared_ptr convert; @@ -1016,7 +1132,7 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization( convert = nullptr; } else { convert = std::make_shared( - input, + parent, deqPrecision); parent = convert; } @@ -1212,11 +1328,20 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } -FakeQuantizeDequantization NetworkHelper::getDequantizationBelow(const std::shared_ptr& node) { +FakeQuantizeDequantization NetworkHelper::getDequantizationBelow(const std::shared_ptr& node, const bool convertIsMandatory) { const Output dataNode = node->output(0); - std::shared_ptr lastNode = dataNode.get_target_inputs().begin()->get_node()->shared_from_this(); + const auto& targetInputs = dataNode.get_target_inputs(); + if (targetInputs.size() == 0ul) { + return FakeQuantizeDequantization(); + } + + std::shared_ptr lastNode = targetInputs.begin()->get_node()->shared_from_this(); const std::shared_ptr convert = as_type_ptr(lastNode); + if (convertIsMandatory && (convert == nullptr)) { + return FakeQuantizeDequantization(); + } + if (convert != nullptr) { if ((convert->input(0).get_element_type() != element::i8) && (convert->input(0).get_element_type() != element::u8) && (convert->output(0).get_element_type() != element::f32)) { @@ -1466,11 +1591,13 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter dequantization.subtractConstant->output(0).get_element_type(); } - parent = std::make_shared( - parent, - dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? - dequantization.subtractConstant : - foldConvert(dequantization.subtractConstant, parentPrecision)); + parent = std::make_shared>( + std::vector{element::f32, element::f32}, std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType( + dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? + dequantization.subtractConstant : + foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get()); ngraph::copy_runtime_info({ newOperation, parent }, parent); } else { parent = std::make_shared(parent, dequantization.subtractConvert); @@ -1594,8 +1721,8 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data } } const auto subtractValues = subtractConst->cast_vector(); - if (std::any_of(subtractValues.begin(), subtractValues.end(), [min, max] (const float& val) { - return (val < min) || (val > max); })) { + if (std::any_of(subtractValues.begin(), subtractValues.end(), [min, max](const float& val) { + return (val < min) || (val > max); })) { return false; } } else if (is_type(node)) { @@ -1605,12 +1732,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data min = dataPrecision.min - 0.5f; max = dataPrecision.max + 0.5f; const auto quantizationDetails = QuantizationDetails::getDetails(as_type_ptr(node)); - for (size_t i = 0; i < quantizationDetails.outputIntervalsCount; ++i) { + for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) { float shift; if (quantizationDetails.outputHighValues[i] != quantizationDetails.outputLowValues[i]) { shift = (dataPrecision.min * quantizationDetails.outputHighValues[i] - - dataPrecision.max * quantizationDetails.outputLowValues[i]) / - (quantizationDetails.outputHighValues[i] - quantizationDetails.outputLowValues[i]); + dataPrecision.max * quantizationDetails.outputLowValues[i]) / + (quantizationDetails.outputHighValues[i] - quantizationDetails.outputLowValues[i]); } else { shift = 0.f; } @@ -1619,6 +1746,7 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data } } } + return true; } @@ -1705,6 +1833,23 @@ bool NetworkHelper::isDQByDynamicDimension(const std::shared_ptr& layer, s return false; } -} // namespace low_precision -} // namespace pass -} // namespace ngraph +bool isDisabled(const std::shared_ptr& node) { + for (const auto& input : node->inputs()) { + auto precisionAttribute = getAttribute>(input); + if (precisionAttribute == nullptr) { + continue; + } + + assert(precisionAttribute->get() != nullptr); + assert(precisionAttribute->get()->sharedValue != nullptr); + + const auto& precisionRestrictions = precisionAttribute->get()->sharedValue->precisions; + if (precisionRestrictions.empty()) { + return true; + } + } + return false; +} +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp index 474602166751af..0ec9876e309a7d 100644 --- a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp +++ b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type_traits.hpp" #include "low_precision/network_helper.hpp" @@ -18,6 +20,8 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::NormalizeL2Transformation, "NormalizeL2Transformation", 0); + namespace normalize_l2 { template @@ -35,6 +39,21 @@ std::shared_ptr createNewScalesConst(const ngraph::op::Con } // namespace normalize_l2 +NormalizeL2Transformation::NormalizeL2Transformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "NormalizeL2Transformation"); + this->register_matcher(m, callback); +} + bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& context, std::shared_ptr operation) const { if (!LayerTransformation::canBeTransformed(context, operation)) { return false; @@ -79,17 +98,7 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co return true; } -void NormalizeL2Transformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label() - })); -} - -bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr operation = m.get_match_root(); if (!canBeTransformed(context, operation)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/prelu.cpp b/inference-engine/src/low_precision_transformations/src/prelu.cpp index 797d2d1dbfb389..17827ef9f712c7 100644 --- a/inference-engine/src/low_precision_transformations/src/prelu.cpp +++ b/inference-engine/src/low_precision_transformations/src/prelu.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,14 +17,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void PReluTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PReluTransformation, "PReluTransformation", 0); + +PReluTransformation::PReluTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "PReluTransformation"); + this->register_matcher(m, callback); } -bool PReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool PReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr prelu = m.get_match_root(); if (!canBeTransformed(context, prelu)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp b/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp new file mode 100644 index 00000000000000..4b15dd7e7b922f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/propagate_precisions.hpp" + +#include + +#include +#include +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/propagate_to_input.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PropagatePrecisions, "PropagatePrecisions", 0); + +bool ngraph::pass::low_precision::PropagatePrecisions::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr precisionsPropagation = manager.register_pass(); + precisionsPropagation->add_matcher>(AttributeSource::OutputPort); + precisionsPropagation->add_matcher>(); + precisionsPropagation->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index ed8ef754102384..ca97aae0dc3e2c 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -15,6 +15,8 @@ #include #include +#include "low_precision/lpt_itt.hpp" + #include #include @@ -27,130 +29,80 @@ QuantizationDetails::QuantizationDetails() inputLowValues({}), inputHighValues({}), outputLowValues({}), - outputHighValues({}), - inputIntervalsCount(0), - outputIntervalsCount(0), - outputChannelsCount(0) {} + outputHighValues({}) {} QuantizationDetails::QuantizationDetails(const QuantizationDetails& quantizationDetails) : levels(quantizationDetails.levels), inputLowValues(quantizationDetails.inputLowValues), inputHighValues(quantizationDetails.inputHighValues), outputLowValues(quantizationDetails.outputLowValues), - outputHighValues(quantizationDetails.outputHighValues), - inputIntervalsCount(quantizationDetails.inputIntervalsCount), - outputIntervalsCount(quantizationDetails.outputIntervalsCount), - outputChannelsCount(quantizationDetails.outputChannelsCount) {} + outputHighValues(quantizationDetails.outputHighValues) {} QuantizationDetails::QuantizationDetails(const size_t levels, const std::vector& inputLowValues, const std::vector& inputHighValues, const std::vector& outputLowValues, - const std::vector& outputHighValues, const size_t inputIntervalsCount, - const size_t outputIntervalsCount, const size_t outputChannelsCount) + const std::vector& outputHighValues) : levels(levels), inputLowValues(inputLowValues), inputHighValues(inputHighValues), outputLowValues(outputLowValues), - outputHighValues(outputHighValues), - inputIntervalsCount(inputIntervalsCount), - outputIntervalsCount(outputIntervalsCount), - outputChannelsCount(outputChannelsCount) {} + outputHighValues(outputHighValues) {} bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr quantize) { - if (!is_type(quantize->get_input_node_ptr(1)) || - !is_type(quantize->get_input_node_ptr(2)) || - !is_type(quantize->get_input_node_ptr(3)) || - !is_type(quantize->get_input_node_ptr(4))) { - return false; - } - - const size_t inputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector().size(); - const size_t inputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector().size(); - if (inputLowValuesSize != inputHighValuesSize) { - return false; - } - - const size_t outputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(3))->cast_vector().size(); - const size_t outputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(4))->cast_vector().size(); - if (outputLowValuesSize != outputHighValuesSize) { - return false; - } - - return true; + return is_type(quantize->get_input_node_ptr(1)) && + is_type(quantize->get_input_node_ptr(2)) && + is_type(quantize->get_input_node_ptr(3)) && + is_type(quantize->get_input_node_ptr(4)); } void QuantizationDetails::getInputIntervals( std::shared_ptr quantize, std::vector& inputLowValues, - std::vector& inputHighValues, - size_t& inputIntervalsCount) { + std::vector& inputHighValues) { std::shared_ptr inputLowLayer = as_type_ptr(quantize->get_input_node_shared_ptr(1)); - validate(inputLowLayer); const std::vector& inputLowBlobValues = getBlobValue(inputLowLayer); inputLowValues.insert(inputLowValues.end(), inputLowBlobValues.begin(), inputLowBlobValues.end()); std::shared_ptr inputHighLayer = as_type_ptr(quantize->get_input_node_shared_ptr(2)); - validate(inputHighLayer); const std::vector inputHighBlobValues = getBlobValue(inputHighLayer); inputHighValues.insert(inputHighValues.end(), inputHighBlobValues.begin(), inputHighBlobValues.end()); if (inputLowValues.size() != inputHighValues.size()) { THROW_IE_LPT_EXCEPTION(*quantize) << "Quantize input values sizes are not equal for layer " << quantize->get_friendly_name(); } - - inputIntervalsCount = inputLowValues.size(); } void QuantizationDetails::getOutputIntervals( std::shared_ptr quantize, std::vector& outputLowValues, - std::vector& outputHighValues, - size_t& outputIntervalsCount) { + std::vector& outputHighValues) { std::shared_ptr outputLowLayer = as_type_ptr(quantize->get_input_node_shared_ptr(3)); - validate(outputLowLayer); const std::vector& outputLowBlobValues = getBlobValue(outputLowLayer); outputLowValues.insert(outputLowValues.end(), outputLowBlobValues.begin(), outputLowBlobValues.end()); std::shared_ptr outputHighLayer = as_type_ptr(quantize->get_input_node_shared_ptr(4)); - validate(outputHighLayer); const std::vector outputHighBlobValues = getBlobValue(outputHighLayer); outputHighValues.insert(outputHighValues.end(), outputHighBlobValues.begin(), outputHighBlobValues.end()); if (outputLowValues.size() != outputHighValues.size()) { THROW_IE_LPT_EXCEPTION(*quantize) << "Quantize output values sizes are not equal for layer " << quantize->get_friendly_name(); } - - outputIntervalsCount = outputLowValues.size(); } - QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr quantize) { - std::vector inputLowValues; - std::vector inputHighValues; - size_t inputIntervalsCount; - getInputIntervals(quantize, inputLowValues, inputHighValues, inputIntervalsCount); - - std::vector outputLowValues; - std::vector outputHighValues; - size_t outputIntervalsCount; - getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount); - - const size_t outputChannelsCount = outputLowValues.size() == 1ul ? 1ul : - NetworkHelper::getOutputChannelsCount(quantize, NetworkHelper::isConstantPath(quantize)); - if (!outputLayoutIsSupported(quantize)) { - THROW_IE_LPT_EXCEPTION(*quantize) << "Expected output channels count " << outputIntervalsCount << " but found " << outputChannelsCount; - } + const std::vector inputLowValues = as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector(); + const std::vector inputHighValues = as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector(); + + const std::vector outputLowValues = as_type_ptr(quantize->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(quantize->get_input_node_shared_ptr(4))->cast_vector(); return QuantizationDetails( - quantize->get_levels(), - inputLowValues, - inputHighValues, - outputLowValues, - outputHighValues, - inputIntervalsCount, - outputIntervalsCount, - outputChannelsCount); + quantize->get_levels(), + inputLowValues, + inputHighValues, + outputLowValues, + outputHighValues); } bool QuantizationDetails::hasNegativeOutput() const { @@ -181,63 +133,20 @@ float QuantizationDetails::maxInput(const size_t channel) const { return value; } -float QuantizationDetails::maxOutputHigh() const { - float output = getOutputHighValue(0); - for (size_t channel = 1; channel < outputIntervalsCount; ++channel) { - if (output < getOutputHighValue(channel)) { - output = getOutputHighValue(channel); - } - } - return output; -} - -float QuantizationDetails::minOutputLow() const { - float output = getOutputLowValue(0); - for (size_t channel = 1; channel < outputIntervalsCount; ++channel) { - if (output > getOutputLowValue(channel)) { - output = getOutputLowValue(channel); - } - } - return output; -} - -float QuantizationDetails::getInputLowValue(const size_t channel) const { - if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount; - } - const float value = inputLowValues.size() == 1 ? inputLowValues[0] : inputLowValues[channel]; - return value; -} - -float QuantizationDetails::getInputHighValue(const size_t channel) const { - if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount; - } - const float value = inputHighValues.size() == 1 ? inputHighValues[0] : inputHighValues[channel]; - return value; +float QuantizationDetails::getInputLowValue(const size_t index) const { + return inputLowValues.size() == 1ul ? inputLowValues[0] : inputLowValues[index]; } -float QuantizationDetails::getOutputLowValue(const size_t channel) const { - if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, output channels count " - << outputIntervalsCount; - } - const float value = outputLowValues.size() == 1 ? outputLowValues[0] : outputLowValues[channel]; - return value; +float QuantizationDetails::getInputHighValue(const size_t index) const { + return inputHighValues.size() == 1ul ? inputHighValues[0] : inputHighValues[index]; } -float QuantizationDetails::getOutputHighValue(const size_t channel) const { - if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, output channels count " - << outputIntervalsCount; - } - const float value = outputHighValues.size() == 1 ? outputHighValues[0] : outputHighValues[channel]; - return value; +float QuantizationDetails::getOutputLowValue(const size_t index) const { + return outputLowValues.size() == 1ul ? outputLowValues[0] : outputLowValues[index]; } -void QuantizationDetails::validate(std::shared_ptr constantLayer) { - // nothing to validate - // TODO: remove? +float QuantizationDetails::getOutputHighValue(const size_t index) const { + return outputHighValues.size() == 1ul ? outputHighValues[0] : outputHighValues[index]; } std::vector QuantizationDetails::getBlobValue(std::shared_ptr constantLayer) { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp index d79be9f6e5416f..e178d94b98a090 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp @@ -13,7 +13,7 @@ namespace low_precision { ReduceBaseTransformation::ReduceBaseTransformation(const Params& params) : LayerTransformation(params) {} -bool ReduceBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ReduceBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/reduce_max.cpp b/inference-engine/src/low_precision_transformations/src/reduce_max.cpp index e5c039d9fc2869..29e230314e72d9 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_max.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_max.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_max.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMaxTransformation, "ReduceMaxTransformation", 0); + +ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMaxTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMaxTransformation"); + this->register_matcher(m, callback); } bool ReduceMaxTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp b/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp index deb5b5237d1170..c91abbeb1ccc9e 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_mean.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMeanTransformation, "ReduceMeanTransformation", 0); + +ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMeanTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMeanTransformation"); + this->register_matcher(m, callback); } bool ReduceMeanTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_min.cpp b/inference-engine/src/low_precision_transformations/src/reduce_min.cpp index 8e8d7ef031498d..1d0e9da5accddc 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_min.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_min.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_min.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMinTransformation, "ReduceMinTransformation", 0); + +ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMinTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMinTransformation"); + this->register_matcher(m, callback); } bool ReduceMinTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp b/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp index 5ad65d782186f4..7ffcb435bd0895 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_sum.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceSumTransformation, "ReduceSumTransformation", 0); + +ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceSumTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceSumTransformation"); + this->register_matcher(m, callback); } bool ReduceSumTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/relu.cpp b/inference-engine/src/low_precision_transformations/src/relu.cpp index 0a0b79bebad517..0c9f43c37e9487 100644 --- a/inference-engine/src/low_precision_transformations/src/relu.cpp +++ b/inference-engine/src/low_precision_transformations/src/relu.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,14 +17,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void ReluTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label()})); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReluTransformation, "ReluTransformation", 0); + +ReluTransformation::ReluTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ReluTransformation"); + this->register_matcher(m, callback); } -bool ReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr relu = m.get_match_root(); if (!canBeTransformed(context, relu)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/reshape.cpp b/inference-engine/src/low_precision_transformations/src/reshape.cpp index db751f58f2fb78..f478928537ee47 100644 --- a/inference-engine/src/low_precision_transformations/src/reshape.cpp +++ b/inference-engine/src/low_precision_transformations/src/reshape.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,11 +20,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void ReshapeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReshapeTransformation, "ReshapeTransformation", 0); + +ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ReshapeTransformation"); + this->register_matcher(m, callback); } void reshapeDequantizationConstant(const std::shared_ptr& reshape) { @@ -154,7 +166,7 @@ void reshapeDequantizationConstant(const std::shared_ptr& resha } } -bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr reshape = as_type_ptr(m.get_match_root()); if (NetworkHelper::isConstantPath(reshape)) { return false; @@ -204,6 +216,12 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex return false; } + // TODO: LPT: to support current flow: #58269 + //if (((dequantization.subtractConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.subtractConstant)) || + // ((dequantization.multiplyConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.multiplyConstant))) { + // return true; + //} + const Shape subtractShape = dequantization.subtract == nullptr ? Shape{} : dequantization.subtractConstant->get_shape(); Shape subtractShapeWithBatch = subtractShape; const PartialShape inputPShape = op->get_input_partial_shape(0); diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp new file mode 100644 index 00000000000000..3bafe518a91b01 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp" + +#include +#include +#include + +using namespace ngraph; + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +void VariantWrapper::merge( + std::vector>>>& attributes) { +} + +std::string VariantWrapper::to_string() { + auto value = this->m_value; + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp new file mode 100644 index 00000000000000..cb786a8af36e05 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp @@ -0,0 +1,216 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" + +#include +#include +#include + +#include "low_precision/lpt_itt.hpp" +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +IntervalsAlignmentAttribute::IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + size_t levels) : levels(levels) { + sharedValue = std::make_shared(combinedInterval, combinedInterval, levels); +} + +IntervalsAlignmentAttribute::IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + const size_t levels, + const IntervalsAlignmentSharedValue::Interval minInterval, + const size_t minLevels) : levels(levels) { + sharedValue = std::make_shared(combinedInterval, minInterval, minLevels); +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::shared_ptr>> VariantWrapper::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + if (!is_type(node)) { + return nullptr; + } + + auto fakeQuantize = as_type_ptr(node); + if (!QuantizationDetails::outputLayoutIsSupported(fakeQuantize) || !QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { + return nullptr; + } + + float lowInterval; + float highInterval; + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "calculateIntervals"); + + FakeQuantizeDequantization dequantization; + { + const auto targetInputs = node->output(0).get_target_inputs(); + if (targetInputs.size() == 1ul) { + dequantization = NetworkHelper::getDequantizationBelow(node, true); + } + } + + const auto outLow = as_type_ptr(node->get_input_node_shared_ptr(3)); + const auto outHigh = as_type_ptr(node->get_input_node_shared_ptr(4)); + if (!NetworkHelper::isScalarLike(outLow) || !NetworkHelper::isScalarLike(outHigh)) { + return nullptr; + } + + if (dequantization.empty()) { + const std::vector lowIntervals = outLow->cast_vector(); + lowInterval = *std::min_element(lowIntervals.begin(), lowIntervals.end()); + + const std::vector highIntervals = outHigh->cast_vector(); + highInterval = *std::max_element(highIntervals.begin(), highIntervals.end()); + } else { + { + auto multiplyResult = dequantization.multiplyConstant == nullptr ? + node->get_input_node_ptr(3)->shared_from_this() : + fold( + foldConvert(node->get_input_node_ptr(3)->shared_from_this(), params.deqPrecision), + dequantization.multiplyConstant); + + auto multiplyResultConstant = as_type_ptr(multiplyResult); + auto intervals = multiplyResultConstant->cast_vector(); + lowInterval = *std::min_element(intervals.begin(), intervals.end()); + } + + { + auto multiplyResult = dequantization.multiplyConstant == nullptr ? + node->get_input_node_ptr(4)->shared_from_this() : + fold( + foldConvert(node->get_input_node_ptr(4)->shared_from_this(), params.deqPrecision), + dequantization.multiplyConstant); + + auto multiplyResultConstant = as_type_ptr(multiplyResult); + auto intervals = multiplyResultConstant->cast_vector(); + highInterval = *std::max_element(intervals.begin(), intervals.end()); + } + } + + if (std::isinf(lowInterval) || std::isinf(highInterval)) { + return nullptr; + } + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "create"); + + assert(!std::isinf(lowInterval)); + assert(!std::isinf(highInterval)); + + auto& rtInfo = node->get_rt_info(); + const IntervalsAlignmentSharedValue::Interval interval{ lowInterval, highInterval }; + const auto attribute = std::make_shared<::ngraph::VariantWrapper>( + ngraph::pass::low_precision::make_shared_attribute( + interval, + fakeQuantize->get_levels())); + rtInfo[ngraph::VariantWrapper::type_info.name] = attribute; + + const std::vector outputLowValues = as_type_ptr(fakeQuantize->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(fakeQuantize->get_input_node_shared_ptr(4))->cast_vector(); + LayerTransformation::PrecisionDetails preferablePrecision = LayerTransformation::getPrecisionDetails( + fakeQuantize->get_levels(), + outputLowValues, + outputHighValues); + + if (preferablePrecision.precision != element::undefined) { + attribute->get()->sharedValue->preferablePrecisions.insert(preferablePrecision.precision); + } + +#ifdef LPT_DEBUG + attribute->get()->sharedValue->minLevelsOperation = node->get_friendly_name(); +#endif + + return attribute; + } +} + +void VariantWrapper::merge( + std::vector>>>& attributes) { + std::shared_ptr resultAttribute = get(); + for (const auto& attributeWrapper : attributes) { + auto attribute = attributeWrapper->get(); + + // TODO: LPT: copy/past: merge() + const auto& resultSharedValue = resultAttribute->sharedValue; + const auto& sharedValue = attribute->sharedValue; + if (resultAttribute->levels != attribute->levels) { + // TODO: LPT: not supported right now + resultAttribute->levels = 0ul; + resultSharedValue->minLevels = 0ul; + } + + if (resultSharedValue->combinedInterval.low > sharedValue->combinedInterval.low) { + resultSharedValue->combinedInterval.low = sharedValue->combinedInterval.low; + } + + if (resultSharedValue->combinedInterval.high < sharedValue->combinedInterval.high) { + resultSharedValue->combinedInterval.high = sharedValue->combinedInterval.high; + } + + assert(!std::isinf(resultSharedValue->combinedInterval.low)); + assert(!std::isinf(resultSharedValue->combinedInterval.high)); + + resultSharedValue->preferablePrecisions.insert(sharedValue->preferablePrecisions.begin(), sharedValue->preferablePrecisions.end()); + + const auto resultSize = std::abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low); + const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low); + if (resultSize > size) { + resultSharedValue->minInterval = sharedValue->minInterval; + + float dequantizationMul; + float dequantizationSub; + float updatedOutputLowValue; + float updatedOutputHighValue; + + const size_t minLevels = NetworkHelper::calculateLevels( + 0.f, + DataPrecision::getMaxValue(resultAttribute->levels), + resultSharedValue->combinedInterval.low, + resultSharedValue->combinedInterval.high, + resultSharedValue->minInterval.low, + resultSharedValue->minInterval.high, + dequantizationMul, + dequantizationSub, + updatedOutputLowValue, + updatedOutputHighValue); + + resultSharedValue->minLevels = minLevels; + +#ifdef LPT_DEBUG + resultSharedValue->minLevelsOperation = sharedValue->minLevelsOperation; +#endif + } + } +} + +std::string VariantWrapper::to_string() { + std::stringstream preferablePrecisions; + preferablePrecisions << "{"; + size_t index = 0; + for (const auto& precision : m_value->sharedValue->preferablePrecisions) { + preferablePrecisions << (index > 0 ? ", " : "") << precision; + ++index; + } + preferablePrecisions << "}"; + + std::stringstream ss; + ss << m_value->get_string(); + ss << "levels: " + std::to_string(m_value->levels) << ", " << + "combined: { " << m_value->sharedValue->combinedInterval.low << ", " << m_value->sharedValue->combinedInterval.high << " }, " << + "min: { " << m_value->sharedValue->minInterval.low << ", " << m_value->sharedValue->minInterval.high << " }, " + "minLevels: " << m_value->sharedValue->minLevels << +#ifdef LPT_DEBUG + ", minLevelsOperation: " << m_value->sharedValue->minLevelsOperation << +#endif + ", preferablePrecisions: " << preferablePrecisions.str(); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp new file mode 100644 index 00000000000000..fe418173f2c524 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp @@ -0,0 +1,10 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" + +using namespace ngraph; + +template class ngraph::VariantImpl; +constexpr VariantTypeInfo VariantWrapper::type_info; \ No newline at end of file diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp new file mode 100644 index 00000000000000..8e8a9b0b62f04e --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +#include +#include + +using namespace ngraph; + +PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) { + sharedValue->value = value; +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::string VariantWrapper::to_string() { + auto& value = this->m_value; + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp new file mode 100644 index 00000000000000..c69fc1d9b690d2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/precisions_attribute.hpp" + +#include +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; + +// order defines default precision +const std::vector PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 }; + +PrecisionsAttribute::PrecisionsAttribute(const std::vector& precisions) { + sharedValue->precisions = precisions; +} + +template class ngraph::VariantImpl>; + +constexpr VariantTypeInfo VariantWrapper>::type_info; + +std::shared_ptr>> VariantWrapper>::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + auto attribute = ngraph::pass::low_precision::make_shared_attribute(); + auto wrapper = std::make_shared>>(attribute); + + auto& rt = is_type(node) ? node->output(0).get_rt_info() : node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = wrapper; + return wrapper; +} + +void VariantWrapper>::merge( + std::vector>>>& attributes) { + auto& my = this->get()->sharedValue->precisions; + for (auto attribute : attributes) { + const auto& attributeValues = attribute->get()->sharedValue->precisions; + auto it = my.begin(); + while (it != my.end()) { + if (std::find(attributeValues.begin(), attributeValues.end(), *it) == attributeValues.end()) { + it = my.erase(it); + } else { + it++; + } + } + if (my.size() == 0ul) { + break; + } + } +} + +std::shared_ptr VariantWrapper>::init(const std::shared_ptr& node) { + return nullptr; +} + +std::string VariantWrapper>::to_string() { + std::stringstream ss; + + ss << m_value->get_string(); + + bool firstPrecision = true; + ss << "precisions: {"; + for (const auto& value : m_value->sharedValue->precisions) { + if (!firstPrecision) { + ss << ", "; + } + ss << value; + firstPrecision = false; + } + ss << "}"; + + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp new file mode 100644 index 00000000000000..e02c8153b2c0d5 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" + +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +QuantizationAlignmentAttribute::QuantizationAlignmentAttribute(const bool hasToBeAligned) { + sharedValue = std::make_shared(hasToBeAligned); +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::shared_ptr VariantWrapper::init(const std::shared_ptr& node) { + return nullptr; +} + +std::shared_ptr>> VariantWrapper::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + if (getAttribute>(node) != nullptr) { + return nullptr; + } + + if (!NetworkHelper::isPrecisionPreserved(node)) { + return nullptr; + } + + bool leastOneOperationIsFakeQuantize = false; + bool leastOneOperationIsNotFakeQuantize = false; + for (auto index = 0ul; index < node->get_input_size(); ++index) { + const auto& input = node->input(index); + auto inputNode = input.get_source_output().get_node_shared_ptr(); + + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0); + } + + if (is_type(inputNode)) { + continue; + } + + if (!is_type(inputNode)) { + leastOneOperationIsNotFakeQuantize = true; + break; + } + + leastOneOperationIsFakeQuantize = true; + } + + if (leastOneOperationIsFakeQuantize && !leastOneOperationIsNotFakeQuantize) { + auto& rt = node->get_rt_info(); + const auto attribute = std::make_shared>( + make_shared_attribute()); + rt[ngraph::VariantWrapper::type_info.name] = attribute; + return attribute; + } + + return nullptr; +} + +void VariantWrapper::merge( + std::vector>>>& attributes) { + auto currentAttributte = get(); + for (const auto& attribute : attributes) { + currentAttributte->sharedValue->value = currentAttributte->sharedValue->value || attribute->get()->sharedValue->value; + } +} + +std::string VariantWrapper::to_string() { + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (m_value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp new file mode 100644 index 00000000000000..95cc5fa72eae79 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/shared_value_attribute.hpp" + +#include +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; diff --git a/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp b/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp index 2ed3e54a86badb..129bcb23977547 100644 --- a/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp @@ -8,21 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ShuffleChannelsTransformation::ShuffleChannelsTransformation(const Params& params) : LayerTransformation(params) {} -void ShuffleChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ShuffleChannelsTransformation, "ShuffleChannelsTransformation", 0); + +ShuffleChannelsTransformation::ShuffleChannelsTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ShuffleChannelsTransformation"); + this->register_matcher(m, callback); } -bool ShuffleChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ShuffleChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp index 919c6b5e87b185..a663fc64f0a2fa 100644 --- a/inference-engine/src/low_precision_transformations/src/split.cpp +++ b/inference-engine/src/low_precision_transformations/src/split.cpp @@ -4,21 +4,34 @@ #include "low_precision/split.hpp" #include "ngraph/node.hpp" + +#include + #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" namespace ngraph { namespace pass { namespace low_precision { -SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) {} -void SplitTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SplitTransformation, "SplitTransformation", 0); + +SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "SplitTransformation"); + this->register_matcher(m, callback); } -bool SplitTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool SplitTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } @@ -106,19 +119,20 @@ void SplitTransformation::updateOutputs( TransformationContext& context, std::vector> lastNodes, std::shared_ptr originalNode) const { - const size_t outputSize = context.function->get_output_size(); - if (outputSize == 1) { + //TODO: LPT: during refactoring update is not tested + if (lastNodes.size() == 1ul) { updateOutput(context, lastNodes[0], originalNode); } else { const std::string originalName = originalNode->get_friendly_name(); - for (size_t outIdx = 0; outIdx < lastNodes.size(); ++outIdx) { - for (size_t i = 0; i < outputSize; ++i) { - std::shared_ptr result = context.function->get_output_op(i); - std::shared_ptr outputNode = result->get_input_node_shared_ptr(0); - if (outputNode.get() == lastNodes[outIdx].get()) { - originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); - lastNodes[outIdx]->set_friendly_name(originalName + "." + std::to_string(outIdx)); - break; + for (size_t i = 0; i < lastNodes.size(); ++i) { + const auto lastNode = lastNodes[i]; + for (auto output : lastNodes[i]->outputs()) { + for (auto input : output.get_target_inputs()) { + if (is_type(input.get_node())) { + originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); + lastNode->set_friendly_name(originalName + "." + std::to_string(i)); + break; + } } } } diff --git a/inference-engine/src/low_precision_transformations/src/squeeze.cpp b/inference-engine/src/low_precision_transformations/src/squeeze.cpp index 4203f8ce4f251c..8ecad0adea489a 100644 --- a/inference-engine/src/low_precision_transformations/src/squeeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/squeeze.cpp @@ -8,23 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SqueezeTransformation, "SqueezeTransformation", 0); + SqueezeTransformation::SqueezeTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void SqueezeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "SqueezeTransformation"); + this->register_matcher(m, callback); } -bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp index ea01d1e8b24715..5e34d1bf45b453 100644 --- a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp +++ b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp @@ -7,12 +7,15 @@ #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::StridedSliceTransformation, "StridedSliceTransformation", 0); + std::shared_ptr stridedSliceDeqConstant( const std::shared_ptr strSlice, const std::shared_ptr dequantizaitonConstant) { @@ -71,19 +74,22 @@ std::shared_ptr stridedSliceDeqConstant( return NetworkHelper::toScalarIfPossible(result); } -StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) {} +StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void StridedSliceTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label(), - make_op_label(), - make_op_label() })); + auto m = std::make_shared(matcher, "StridedSliceTransformation"); + this->register_matcher(m, callback); } -bool StridedSliceTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool StridedSliceTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!StridedSliceTransformation::canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/subgraph.cpp b/inference-engine/src/low_precision_transformations/src/subgraph.cpp deleted file mode 100644 index 4fd36f8d7e8b6c..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/subgraph.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include "low_precision/quantization_details.hpp" -#include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/network_helper.hpp" - - -namespace ngraph { -namespace pass { -namespace low_precision { - -bool operationIsSupportedInConcat(const std::shared_ptr& node) { - // list of operations, which change channels, but supported in ConcatTransformation - if (ngraph::is_type(node) || - ngraph::is_type(node) || - ngraph::is_type(node)) { - return true; - } - - // operations, which change channels, usually don't support in ConcatTransformation - const auto inputs = node->input_values(); - for (const auto& input : inputs) { - if (ngraph::is_type(input.get_node())) { - continue; - } - - const PartialShape& in = input.get_partial_shape(); - const PartialShape& out = node->get_output_partial_shape(0); - if (in.rank().is_dynamic() || out.rank().is_dynamic()) { - return false; - } - - const auto inRank = in.rank().get_length(); - const auto outRank = out.rank().get_length(); - if (inRank < 2 || outRank < 2) { - return false; - } - - for (int i = 0; i < 2; ++i) { - if ((i >= inRank) || (i >= outRank)) { - // all previous dimensions are equal - return true; - } - if (in[i] != out[i]) { - return false; - } - } - } - - return true; -} - -Subgraph::Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager) : layerTransformationsManager(layerTransformationsManager) { -} - -bool Subgraph::fillSubgraphForQuantization( - const std::shared_ptr& fakeQuantize, - std::unordered_set& handledLayers) { - quantizationLayers.push_back(fakeQuantize); - handledLayers.insert(fakeQuantize->get_friendly_name()); - layers.emplace(fakeQuantize->get_friendly_name(), fakeQuantize); - - for (size_t index = 0; index < fakeQuantize->get_output_size(); ++index) { - const auto childInputs = fakeQuantize->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - const std::shared_ptr child = childInput.get_node()->shared_from_this(); - if (handledLayers.find(child->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatChild = ngraph::as_type_ptr(child); - if (concatChild != nullptr) { - if (!fillSubgraphForConcat(concatChild, handledLayers)) { - return false; - } - } else { - const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); - if (fakeQuantizeChild != nullptr) { - // - } else { - if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { - if (!fillSubgraphForIntermediate(child, handledLayers)) { - return false; - } - } - } - } - } - } - - return true; -} - -bool Subgraph::atLeastOneIsIntermediate(const std::shared_ptr& node) const { - for (size_t index = 0; index < node->get_output_size(); ++index) { - const auto childInputs = node->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - auto child = childInput.get_node()->shared_from_this(); - if (as_type_ptr(child)) { - return true; - } - - if (!layerTransformationsManager->isPrecisionPreserved(child) || !operationIsSupportedInConcat(child)) { - // child branch is out of subgraph - continue; - } - - if (atLeastOneIsIntermediate(child)) { - return true; - } - } - } - return false; -} - -std::shared_ptr getFakeQuantize(const FakeQuantizeDequantization& dequantization) { - std::shared_ptr node = dequantization.data.get_node_shared_ptr(); - std::shared_ptr fakeQuantize = ngraph::as_type_ptr(node); - if (fakeQuantize != nullptr) { - return fakeQuantize; - } - - if (is_type(node)) { - fakeQuantize = ngraph::as_type_ptr(node->get_input_node_shared_ptr(0)); - } - return fakeQuantize; -} - -bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_set& handledLayers) { - // if at least one parent is handled incorrectly then subgraph is not in low precision - for (size_t index = 0; index < layer->get_input_size(); ++index) { - const std::shared_ptr parent = layer->get_input_node_shared_ptr(index); - if (handledLayers.find(parent->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatParent = ngraph::as_type_ptr(parent); - if (concatParent != nullptr) { - if (!fillSubgraphForConcat(concatParent, handledLayers)) { - return false; - } - } else { - const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(parent, 0, true); - const std::shared_ptr fakeQuantizeParent = dequantization.empty() ? - ngraph::as_type_ptr(parent) : - getFakeQuantize(dequantization); - if (fakeQuantizeParent != nullptr) { - if (!fillSubgraphForQuantization(fakeQuantizeParent, handledLayers)) { - // - } - } else { - const std::shared_ptr constant = ngraph::as_type_ptr(parent); - if (constant != nullptr) { - // - } else { - if (layerTransformationsManager->isPrecisionPreserved(parent) && operationIsSupportedInConcat(parent)) { - if (!fillSubgraphForIntermediate(parent, handledLayers)) { - return false; - } - } else { - return false; - } - } - } - } - } - - // TODO: if at least one child was handled correctly then subgraph is low precision - for (size_t index = 0; index < layer->get_output_size(); ++index) { - const auto childInputs = layer->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - const std::shared_ptr child = childInput.get_node()->shared_from_this(); - - if (handledLayers.find(child->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatChild = ngraph::as_type_ptr(child); - if (concatChild != nullptr) { - if (!fillSubgraphForConcat(concatChild, handledLayers)) { - return false; - } - } else { - // check if children branches between Concat operations - if (!atLeastOneIsIntermediate(child)) { - continue; - } - - const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); - if (fakeQuantizeChild != nullptr) { - // - } else if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { - if (!fillSubgraphForIntermediate(child, handledLayers)) { - return false; - } - } - } - } - } - - return true; -} - -bool Subgraph::fillSubgraphForIntermediate(const std::shared_ptr& intermediate, std::unordered_set& handledLayers) { - handledLayers.insert(intermediate->get_friendly_name()); - layers.emplace(intermediate->get_friendly_name(), intermediate); - - return fill(intermediate, handledLayers); -} - -bool Subgraph::empty() const { - return quantizationLayers.empty(); -} - -bool Subgraph::fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers) { - const auto axis = concat->get_axis(); - const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); - // supported only per-channel concat - if (normalizedAxis != 1ul) { - return false; - } - - concatLayers.push_back(concat); - handledLayers.insert(concat->get_friendly_name()); - layers.emplace(concat->get_friendly_name(), concat); - - std::shared_ptr node = concat; - return fill(node, handledLayers); -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/subtract.cpp b/inference-engine/src/low_precision_transformations/src/subtract.cpp index 2f86bfc97c7931..4c71e191c2f6e2 100644 --- a/inference-engine/src/low_precision_transformations/src/subtract.cpp +++ b/inference-engine/src/low_precision_transformations/src/subtract.cpp @@ -11,6 +11,9 @@ #include #include +#include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,19 +21,27 @@ namespace ngraph { namespace pass { namespace low_precision { -void SubtractTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SubtractTransformation, "SubtractTransformation", 0); + +SubtractTransformation::SubtractTransformation(const Params& params) : LayerTransformation(params) { + auto convert = pattern::wrap_type(); + auto multiply = pattern::wrap_type(); + auto subParent = std::make_shared(OutputVector{ convert, multiply }); + auto subtract = pattern::wrap_type({ subParent, pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(subtract, "SubtractTransformation"); + this->register_matcher(m, callback); } -bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr subtract = as_type_ptr(m.get_match_root()); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp index f79021f93b8bae..f8554db8721ed9 100644 --- a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp +++ b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp @@ -8,6 +8,7 @@ #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -16,8 +17,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void SubtractMultiplyToMultiplyAddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SubtractMultiplyToMultiplyAddTransformation, "SubtractMultiplyToMultiplyAddTransformation", 0); + +SubtractMultiplyToMultiplyAddTransformation::SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "SubtractMultiplyToMultiplyAddTransformation"); + this->register_matcher(m, callback); } FakeQuantizeDequantization get(const std::shared_ptr node) { @@ -52,7 +66,7 @@ FakeQuantizeDequantization get(const std::shared_ptr node) { return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } -bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/transformation_context.cpp b/inference-engine/src/low_precision_transformations/src/transformation_context.cpp index 22d8d3444682de..d5d21c7ecfcc9a 100644 --- a/inference-engine/src/low_precision_transformations/src/transformation_context.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformation_context.cpp @@ -8,6 +8,8 @@ namespace ngraph { namespace pass { namespace low_precision { +TransformationContext::TransformationContext() : function(nullptr) {} + TransformationContext::TransformationContext(std::shared_ptr function) : function(function) { } diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp deleted file mode 100644 index 6018c6f820f67b..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "low_precision/transformer.hpp" -#include "low_precision/network_helper.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ngraph_ops/type_relaxed.hpp" -#include "ngraph/pass/constant_folding.hpp" -#include "ngraph/opsets/opset6.hpp" - -#include "lpt_itt.h" - -// branch specific transformations -#include "low_precision/concat.hpp" -#include "low_precision/concat_multi_channels.hpp" - -// decomposition transformations -#include "low_precision/fake_quantize_decomposition.hpp" - -// general transformations -#include "low_precision/add.hpp" -#include "low_precision/avg_pool.hpp" -#include "low_precision/clamp.hpp" -#include "low_precision/convolution.hpp" -#include "low_precision/convolution_backprop_data.hpp" -#include "low_precision/depth_to_space.hpp" -#include "low_precision/fake_quantize.hpp" -#include "low_precision/group_convolution.hpp" -#include "low_precision/interpolate.hpp" -#include "low_precision/mat_mul.hpp" -#include "low_precision/max_pool.hpp" -#include "low_precision/multiply.hpp" -#include "low_precision/mvn.hpp" -#include "low_precision/normalize_l2.hpp" -#include "low_precision/prelu.hpp" -#include "low_precision/reduce_max.hpp" -#include "low_precision/reduce_mean.hpp" -#include "low_precision/reduce_min.hpp" -#include "low_precision/reduce_sum.hpp" -#include "low_precision/reshape.hpp" -#include "low_precision/relu.hpp" -#include "low_precision/shuffle_channels.hpp" -#include "low_precision/squeeze.hpp" -#include "low_precision/subtract.hpp" -#include "low_precision/split.hpp" -#include "low_precision/strided_slice.hpp" -#include "low_precision/transpose.hpp" -#include "low_precision/unsqueeze.hpp" -#include "low_precision/variadic_split.hpp" -#include "low_precision/split.hpp" - -// cleanup transformations -#include "low_precision/fuse_convert.hpp" -#include "low_precision/fold_convert.hpp" -#include "low_precision/fuse_fake_quantize.hpp" -#include "low_precision/fuse_subtract_to_fake_quantize.hpp" -#include "low_precision/fuse_multiply_to_fake_quantize.hpp" -#include "low_precision/multiply_to_group_convolution.hpp" -#include "low_precision/subtract_multiply_to_multiply_add.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -LowPrecisionTransformations::LowPrecisionTransformations( - const std::map& branchSpecificTransformations, - const std::map& decompositionTransformations, - const std::map& transformations, - const std::map>>& cleanupTransformations, - const std::vector& standaloneCleanupTransformations) : - branchSpecificTransformations(branchSpecificTransformations), - decompositionTransformations(decompositionTransformations), - transformations(transformations), - cleanupTransformations(cleanupTransformations), - standaloneCleanupTransformations(standaloneCleanupTransformations) {} - -void LowPrecisionTransformations::setUpdatePrecisions(const bool updatePrecisions) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setUpdatePrecisions(updatePrecisions); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setUpdatePrecisions(updatePrecisions); - } -} - -void LowPrecisionTransformations::setQuantizedTensorAlignmentOnActivations( - const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations); - } -} - -void LowPrecisionTransformations::setQuantizedTensorAlignmentOnWeights( - const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights); - } -} - -std::vector LowPrecisionTransformations::find(const std::string& transformationKey) const { - auto it = branchSpecificTransformations.find(transformationKey); - std::vector res; - if (it != branchSpecificTransformations.end()) { - res.emplace_back(it->second); - } - - it = transformations.find(transformationKey); - if (it != transformations.end()) { - res.emplace_back(it->second); - } - - const auto it1 = cleanupTransformations.find(transformationKey); - if (it1 != cleanupTransformations.end()) { - for (const auto& transformation : it1->second) { - res.emplace_back(transformation.second); - } - } - - for (const auto& transformation : standaloneCleanupTransformations) { - if (transformation.typeName == transformationKey) { - res.emplace_back(transformation.transformation); - } - } - - return res; -} - -void LowPrecisionTransformations::setParamsManager(IParamsManager* paramsManager) noexcept { - setParamsManager(paramsManager, branchSpecificTransformations); - setParamsManager(paramsManager, decompositionTransformations); - setParamsManager(paramsManager, transformations); - setParamsManager(paramsManager, cleanupTransformations); - setParamsManager(paramsManager, standaloneCleanupTransformations); -} - -void LowPrecisionTransformations::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept { - setLayerTransformationsManager(layerTransformationsManager, branchSpecificTransformations); - setLayerTransformationsManager(layerTransformationsManager, decompositionTransformations); - setLayerTransformationsManager(layerTransformationsManager, transformations); - setLayerTransformationsManager(layerTransformationsManager, cleanupTransformations); - setLayerTransformationsManager(layerTransformationsManager, standaloneCleanupTransformations); -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::map& transformations) noexcept { - for (auto it : transformations) { - it.second->setParamsManager(paramsManager); - } -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::map>>& transformations) noexcept { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->setParamsManager(paramsManager); - } - } -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::vector& transformations) noexcept { - for (auto it : transformations) { - it.transformation->setParamsManager(paramsManager); - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map& transformations) noexcept { - for (auto it : transformations) { - it.second->setLayerTransformationsManager(layerTransformationsManager); - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map < std::string, std::vector < std::pair> > & transformations) noexcept { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->setLayerTransformationsManager(layerTransformationsManager); - } - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::vector& transformations) noexcept { - for (auto it : transformations) { - it.transformation->setLayerTransformationsManager(layerTransformationsManager); - } -} - -LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const LayerTransformation::Params& params) { - using namespace pass::low_precision; - - auto transformer = LowPrecisionTransformations(). - addBranchSpecific(params). - - addDecomposition(params). - - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - - addCleanup(params). - addCleanup(params). - - addStandaloneCleanup(params). - addStandaloneCleanup(params). - addStandaloneCleanup(params). - addStandaloneCleanup(params); - - return transformer; -} - -bool LowPrecisionTransformer::isFunctionQuantized(const std::shared_ptr& function) { - std::set> handledNodes; - std::deque> nodes; - for (auto result : function->get_results()) { - nodes.push_front(result); - } - - while (!nodes.empty()) { - auto node = nodes.front(); - nodes.pop_front(); - - for (size_t i = 0; i < node->inputs().size(); ++i) { - auto parent = node->get_input_node_shared_ptr(i); - if (handledNodes.find(parent) != handledNodes.end()) { - continue; - } - - const std::shared_ptr fakeQuantize = as_type_ptr(parent); - if ((fakeQuantize != nullptr) && - QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && - QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { - return true; - } - - nodes.push_front(parent); - handledNodes.insert(parent); - } - } - return false; -} - -LowPrecisionTransformer::LowPrecisionTransformer(): transformations(LowPrecisionTransformer::getAllTransformations()) {} - -template -void make_matcher_type_relaxed(ngraph::pass::GraphRewrite* transformation) { - using namespace ngraph; - - auto is_op_type = [](std::shared_ptr n) { - return !!as_type_ptr(n); - }; - - auto p_node = std::make_shared(element::f32, Shape{}, is_op_type); - - ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) { - auto l_node = std::dynamic_pointer_cast(m.get_match_root()); - if (std::dynamic_pointer_cast(l_node)) { - return false; - } - if (!l_node) { - THROW_IE_LPT_EXCEPTION(*l_node) << "unexpected operation type"; - } - - std::vector inputPrecisions; - for (auto& inputs : l_node->inputs()) { - inputPrecisions.push_back(inputs.get_element_type()); - } - - std::vector outputPrecisions; - for (auto& output : l_node->outputs()) { - outputPrecisions.push_back(output.get_element_type()); - } - - auto replacement = std::make_shared>(*l_node, inputPrecisions, outputPrecisions); - - copy_runtime_info(l_node, replacement); - replace_node(l_node, replacement); - return true; - }; - - auto m = std::make_shared(p_node, "TypeRelaxedReplacer"); - NGRAPH_SUPPRESS_DEPRECATED_START - transformation->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE); - NGRAPH_SUPPRESS_DEPRECATED_END -} - -TypeRelaxedReplacer::TypeRelaxedReplacer() { - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); -} - -LowPrecisionTransformer::LowPrecisionTransformer(const LowPrecisionTransformations& transformations) - : transformations(transformations) {} - -void LowPrecisionTransformer::transform(std::shared_ptr network) { - if (!isFunctionQuantized(network)) { - return; - } - - OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform"); - - ngraph::pass::ConstantFolding constantFolding; - constantFolding.run_on_function(network); - - transformations.setParamsManager(this); - transformations.setLayerTransformationsManager(this); - - TransformationContext context(network); - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer"); - - // Extend necessary operations with polymorphic semantics - { - TypeRelaxedReplacer pass; - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations"); - - { - // Branch specific transformations - GraphRewrite pass; - registerAllMatchers(transformations.branchSpecificTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition"); - - { - // Step #1: FakeQuantize decomposition transformation execution - GraphRewrite pass; - registerAllMatchers(transformations.decompositionTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations"); - - { - // Step #2: layer transformations execution - GraphRewrite pass; - registerAllMatchers(transformations.transformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations"); - - { - // Step #3: cleanup transformations execution - GraphRewrite pass; - registerAllMatchers(transformations.cleanupTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations"); - - { - // Step #4: standalone cleanup transformations execution - - for (auto it : transformations.standaloneCleanupTransformations) { - GraphRewrite pass; - it.transformation->registerMatcherIn(pass, context); - pass.run_on_function(network); - } - } - - network->validate_nodes_and_infer_types(); -} - -std::vector LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(op); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return std::vector(); - } - std::vector precisions = transformation[0]->getPrecisionsOnActivations(); - - for (const auto& transform : transformation) { - precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations()); - } - return precisions; -} - -bool LowPrecisionTransformer::isQuantized(const std::shared_ptr& layer) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(*layer); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return false; - } - - for (const auto& transform : transformation) { - if (!transform->isQuantized(layer)) { - return false; - } - } - return true; -} - -bool LowPrecisionTransformer::isPrecisionPreserved(const std::shared_ptr& layer) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(*layer); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return false; - } - - for (const auto& transform : transformation) { - if (!transform->isPrecisionPreserved(layer)) { - return false; - } - } - return true; -} - -void LowPrecisionTransformer::registerAllMatchers( - std::map transformations, - GraphRewrite& pass, - TransformationContext& context) { - for (auto it : transformations) { - it.second->registerMatcherIn(pass, context); - } -} - -void LowPrecisionTransformer::registerAllMatchers( - std::map>> transformations, - GraphRewrite& pass, - TransformationContext& context) { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->registerMatcherIn(pass, context); - } - } -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp index b8c75d43619b49..c89ca0e9144c67 100644 --- a/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp @@ -15,7 +15,7 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -bool TransparentBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool TransparentBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto operation = m.get_match_root(); const std::shared_ptr dequantization = operation->input_value(0).get_node_shared_ptr(); // const std::shared_ptr dequantizationParent = dequantization->input_value(0).get_node_shared_ptr(); diff --git a/inference-engine/src/low_precision_transformations/src/transpose.cpp b/inference-engine/src/low_precision_transformations/src/transpose.cpp index de3cd40e0d5257..66f29a66ec88f9 100644 --- a/inference-engine/src/low_precision_transformations/src/transpose.cpp +++ b/inference-engine/src/low_precision_transformations/src/transpose.cpp @@ -7,6 +7,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -14,11 +16,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void TransposeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::TransposeTransformation, "TransposeTransformation", 0); + +TransposeTransformation::TransposeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "TransposeTransformation"); + this->register_matcher(m, callback); } void transposeDequantizationConstant(std::shared_ptr& transpose) { @@ -74,7 +86,7 @@ void transposeDequantizationConstant(std::shared_ptr& transpose) { } } -bool TransposeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool TransposeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr transpose = m.get_match_root(); if (!canBeTransformed(context, transpose)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp index b53341005d477a..b03046e2253357 100644 --- a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp @@ -8,23 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::UnsqueezeTransformation, "UnsqueezeTransformation", 0); + UnsqueezeTransformation::UnsqueezeTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void UnsqueezeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "UnsqueezeTransformation"); + this->register_matcher(m, callback); } -bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp index 685219f27730d0..8cc9ba7caaadea 100644 --- a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp +++ b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp @@ -4,20 +4,33 @@ #include "low_precision/variadic_split.hpp" #include "ngraph/node.hpp" + +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) {} - -void VariadicSplitTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label(), - make_op_label() })); + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::VariadicSplitTransformation, "VariadicSplitTransformation", 0); + +VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) { + auto matcher = pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "VariadicSplitTransformation"); + this->register_matcher(m, callback); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index c760f9a7bace13..402327f277ad74 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -42,9 +42,6 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma if (dequantization.empty()) { const auto fqOnWeights = getFakeQuantizeOnWeights(layer); const auto dataPrecision = getDataPrecisionOnWeights(layer); - if ((!supportAsymmetricQuantization) && dataPrecision.hasZeroPoint) { - return false; - } if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { return false; } @@ -218,7 +215,7 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext return true; } -bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept { +bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr& layer, const bool reshapeIsRequired) noexcept { FakeQuantizeDequantization dequantizationOnWeights; if (reshapeIsRequired) { const auto reshape = layer->get_input_node_shared_ptr(1); @@ -236,7 +233,9 @@ bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, boo const std::shared_ptr fq = as_type_ptr(layer->get_input_node_shared_ptr(1)); return NetworkHelper::isQuantizeSupported(fq); } else { - dequantizationOnWeights = NetworkHelper::getDequantization(layer, 1); + // TODO: update NetworkHelper API later + const std::shared_ptr op = const_cast(layer.get())->shared_from_this(); + dequantizationOnWeights = NetworkHelper::getDequantization(op, 1); } if (dequantizationOnWeights.empty()) { @@ -283,14 +282,21 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr l return false; } -void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { +bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { const auto fq = getFakeQuantizeOnWeights(node); if (fq == nullptr) { - return; + // FakeQuantize has been decomposed already + return true; } const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, true); + const auto precisionsAttribute = getAttributeFromOutput(fq); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); + auto tuple = NetworkHelper::decomposeFakeQuantize( fq, dataPrecision.precision, @@ -302,9 +308,16 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st outChannelsShapeIndex); std::shared_ptr fqOnWeights = std::get<0>(tuple); + // TODO: LPT: issue #58685 + if ((!updatePrecisions) && (fqOnWeights == nullptr)) { + return false; + } + if (as_type_ptr(fqOnWeights) == nullptr) { THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant"; } + + return true; } bool WeightableLayerTransformation::isGroup(const std::shared_ptr& layer) { @@ -327,7 +340,7 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr& lay return (group == inputChannelsCount) && (inputChannelsCount == outputChannelsCount); } -std::shared_ptr WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr& node) const { +std::shared_ptr WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr& node) { auto fq = as_type_ptr(node->input_value(1).get_node_shared_ptr()); // TODO: temporary workaround if (fq == nullptr) { @@ -337,10 +350,38 @@ std::shared_ptr WeightableLayerTransformation::getFakeQuan return fq; } -DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr& node) const { +DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr& node) { const auto fq = getFakeQuantizeOnWeights(node); const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - return getDataPrecision(fq, quantizationDetails, true); + + const auto precisionsAttribute = getAttributeFromOutput(fq); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + return getDataPrecision(fq, quantizationDetails, precisions); +} + +bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr& node) { + const auto n = const_cast(node.get())->shared_from_this(); + + const auto reshapeFromWeights = ngraph::as_type_ptr(n->get_input_node_shared_ptr(1)); + const auto dequantization = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(n, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights); + + if (dequantization.empty()) { + const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n); + if (dataPrecision.hasZeroPoint) { + return true; + } + } else { + if (dequantization.subtract != nullptr) { + return true; + } + } + + return false; } } // namespace low_precision diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 453aff2d9737a2..6e066a4656c384 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -34,8 +34,7 @@ ie_mark_target_as_cc(${TARGET_NAME}) if(SELECTIVE_BUILD STREQUAL "ON") # After disabling a block of code, some variables might be unused. - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR - CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) target_compile_options(${TARGET_NAME} PRIVATE -Wno-unused-variable) endif() endif() @@ -46,8 +45,10 @@ target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine_lp_transformations) target_include_directories(${TARGET_NAME} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} - $) + ${CMAKE_CURRENT_SOURCE_DIR}) + +target_include_directories(${TARGET_NAME} SYSTEM PRIVATE + $) # Cross compiled function # TODO: The same for proposal, proposalONNX, topk @@ -64,15 +65,16 @@ ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) # add test object library add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS}) -target_link_libraries(${TARGET_NAME}_obj PUBLIC mkldnn) +link_system_libraries(${TARGET_NAME}_obj PUBLIC mkldnn) target_include_directories(${TARGET_NAME}_obj PRIVATE $ $ $ $ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - $ - $) + $) + +target_include_directories(${TARGET_NAME}_obj SYSTEM PUBLIC $) set_ie_threading_interface_for(${TARGET_NAME}_obj) diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp new file mode 100644 index 00000000000000..6041e1f3f7b63e --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp @@ -0,0 +1,247 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_blocked_memory_desc.h" +#include "mkldnn_memory.h" +#include "utils/cpu_utils.hpp" + +using namespace MKLDNNPlugin; + +BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims) : MemoryDesc(dims, Blocked) , precision(prc) { + order.resize(dims.size()); + std::iota(order.begin(), order.end(), 0); + blockedDims = dims; + offsetPadding = 0; + offsetPaddingToData.resize(dims.size(), 0); + strides.resize(order.size()); + strides[strides.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)]; + } +} + +BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding, const std::vector& offsetPaddingToData, + const std::vector& strides) : MemoryDesc(dims, Blocked), precision(prc) { + if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "BlockedMemoryDesc do not support undefined order."; + } + + if (std::any_of(blockedDims.begin() + dims.size(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "BlockedMemoryDesc doesn't support undefined blockedDims."; + } + + this->order = order; + this->blockedDims = blockedDims; + this->offsetPadding = offsetPadding; + + if (offsetPaddingToData.empty() && !order.empty()) { + this->offsetPaddingToData.resize(order.size()); + this->offsetPaddingToData[order.size() - 1] = 0; + for (size_t i = 2; i <= order.size(); i++) { + this->offsetPaddingToData[order.size() - i] = 0; + } + } else { + this->offsetPaddingToData = offsetPaddingToData; + } + + if (strides.empty() && !order.empty()) { + if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + this->strides.resize(order.size(), Shape::UNDEFINED_DIM); + } else { + this->strides.resize(order.size()); + this->strides[order.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + this->strides[order.size() - i] = this->strides[order.size() - (i - 1)] * this->blockedDims[blockedDims.size() - (i - 1)]; + } + } + } else { + this->strides = strides; + } + + if (!everyone_is(this->order.size(), this->blockedDims.size(), this->offsetPaddingToData.size(), this->strides.size())) { + IE_THROW() << "Order, blocked dims, offset padding to data and strides must have equals size"; + } +} + +bool BlockedMemoryDesc::isDefined() const { + bool defined = true; + defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(order.cbegin(), order.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(offsetPaddingToData.cbegin(), offsetPaddingToData.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && offsetPadding != Shape::UNDEFINED_DIM; + + return defined; +} + +bool BlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { + const MemoryDesc* pRhs = &rhs; + if (auto blockingDesc = dynamic_cast(pRhs)) { + return isCompatible(*blockingDesc); + } else if (auto mkldnnDesc = dynamic_cast(pRhs)) { + return mkldnnDesc->isCompatible(*this); + } else { + return false; + } +} + +bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) + return false; + + if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { + return false; + } + + if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { + return false; + } + + // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { + return false; + } + + if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { + return false; + } + + return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +} + +bool BlockedMemoryDesc::isCompatible(const MKLDNNMemoryDesc& rhs) const { + return rhs.isCompatible(*this); +} + +size_t BlockedMemoryDesc::getMemSizeImp() const { + int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element) + for (int j = 0; j < getBlockDims().size(); j++) + e_size += (getBlockDims()[j] - 1) * getStrides()[j]; + + + e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size(); + + return e_size; +} + +size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { + InferenceEngine::SizeVector off_v = v; + + size_t n_blocked_dims = order.size(); + if (blockedDims.size() != n_blocked_dims || strides.size() != n_blocked_dims) { + IE_THROW() << "Cannot calculate offset. Incorrect primitive descriptor!"; + } + InferenceEngine::SizeVector blockedShift(n_blocked_dims); + for (size_t i = 1; i <= n_blocked_dims; i++) { + blockedShift[n_blocked_dims - i] = off_v[order[n_blocked_dims - i]] % blockedDims[n_blocked_dims - i]; + off_v[order[n_blocked_dims - i]] /= blockedDims[n_blocked_dims - i]; + } + size_t offset = getOffsetPadding(); + for (size_t d = 0; d < n_blocked_dims; ++d) { + const size_t p = blockedShift[d] + getOffsetPaddingToData()[d]; + offset += p * strides[d]; + } + return offset; +} + +size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { + // TODO [DS]: rewrite to support dynamic shapes + auto& dims = shape.getStaticDims(); + size_t n_dims = dims.size(); + InferenceEngine::SizeVector pos(n_dims); + for (size_t rd = 1; rd <= n_dims; ++rd) { + const size_t d = n_dims - rd; + const size_t cur_dim = dims[d]; + pos[d] = elemNumber % cur_dim; + elemNumber /= cur_dim; + } + return getOffset(pos); +} + +bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; + } +} + +bool BlockedMemoryDesc::isPlainFormat() const { + if (shape.getRank() != order.size()) { + return false; + } + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] != i) { + return false; + } + } + return true; +} + +bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { + if ((order.size() - shape.getRank()) != 1) { + return false; + } + for (size_t i = 0; i < order.size() - 1; ++i) { + if (order[i] != i) { + return false; + } + } + if (order.back() != 1) { + return false; + } + if (blockedDims.back() != blk_size) { + return false; + } + return true; +} + +bool BlockedMemoryDesc::isTailCFormat() const { + if (shape.getRank() < 3) { + return false; + } + if (shape.getRank() != order.size()) { + return false; + } + if (!std::is_sorted(order.begin(), --order.end())) { + return false; + } + if (order.back() != 1) { + return false; + } + return true; +} + +std::string BlockedMemoryDesc::serializeFormat() const { + std::stringstream result; + char startLetter = 'a'; + std::unordered_map mapAxisBlockSize; + for (size_t i = shape.getRank(); i < order.size(); ++i) { + mapAxisBlockSize.insert({order[i], blockedDims[i]}); + } + + for (size_t i = 0; i < shape.getRank(); ++i) { + char nextLetter = startLetter + order[i]; + if (mapAxisBlockSize.count(i)) { + nextLetter = toupper(nextLetter); + } + result << nextLetter; + } + + for (auto& item : mapAxisBlockSize) { + result << item.second << char(startLetter + item.first); + } + + return result.str(); +} diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h new file mode 100644 index 00000000000000..2c5b8a7d53cbdb --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory_desc.h" + +namespace MKLDNNPlugin { + +class MKLDNNMemoryDesc; + +class BlockedMemoryDesc : public MemoryDesc { +public: + BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims); + + BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, + const std::vector& strides = {}); + + MemoryDescPtr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + bool isDefined() const override; + + bool isCompatible(const MemoryDesc& rhs) const override; + + bool isCompatible(const BlockedMemoryDesc& rhs) const; + + bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + + InferenceEngine::Precision getPrecision() const override { + return precision; + } + + void setPrecision(InferenceEngine::Precision prc) override { + precision = std::move(prc); + } + + const std::vector& getBlockDims() const { + return blockedDims; + } + + /** + * @brief Returns the vector of order + * + * @return order + */ + const std::vector& getOrder() const { + return order; + } + + /** + * @brief Returns the per-dimension offset vector + * + * @return offsets + */ + const std::vector& getOffsetPaddingToData() const { + return offsetPaddingToData; + } + /** + * @brief Returns the offset to the current memory block + * + * @return offset + */ + size_t getOffsetPadding() const { + return offsetPadding; + } + + /** + * @brief Returns strides for each dimension + * + * @return strides + */ + const std::vector& getStrides() const { + return strides; + } + + bool hasLayoutType(LayoutType layoutType) const override; + + std::string serializeFormat() const override; + +private: + size_t getElementOffset(size_t elemNumber) const override; + size_t getMemSizeImp() const override; + size_t getOffset(const InferenceEngine::SizeVector& v) const; + bool isPlainFormat() const; + bool isBlockedCFormat(size_t blk_size) const; + bool isTailCFormat() const; + +private: + InferenceEngine::Precision precision; + std::vector blockedDims; + std::vector strides; + std::vector order; + std::vector offsetPaddingToData; + size_t offsetPadding; +}; +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h new file mode 100644 index 00000000000000..31d2b4b2091f00 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "cpu_shape.h" +#include "utils/general_utils.h" + +namespace MKLDNNPlugin { + +enum MemoryDescType { + Blocked, + Mkldnn +}; + +enum class LayoutType : unsigned { + nspc, // general per channels format + ncsp, // general planar + nCsp8c, // general channels blocked by 8 + nCsp16c // general channels blocked by 16 +}; + +class MemoryDesc { +public: + MemoryDescType getType() const { + return type; + } + + const Shape& getShape() const { + return shape; + } + + virtual ~MemoryDesc() = default; + + virtual InferenceEngine::Precision getPrecision() const = 0; + + virtual void setPrecision(InferenceEngine::Precision prc) = 0; + + virtual std::unique_ptr clone() const = 0; + + virtual bool isCompatible(const MemoryDesc& rhs) const = 0; + + // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM) + virtual bool isDefined() const = 0; + + virtual bool hasLayoutType(LayoutType layoutType) const = 0; + + virtual std::string serializeFormat() const = 0; + + /** + * @brief Get minimal required memory size in bytes. + * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor + */ + size_t getCurrentSize() const { + size_t retVal = UNDEFINED_SIZE; + if (isDefined()) { + retVal = getMemSizeImp(); + } + return retVal; + } + + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T* as() { + T* casted = dynamic_cast(this); + if (!casted) + IE_THROW() << "Cannot dynamically cast MemoryDesc"; + return casted; + } + + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + const T* as() const { + const T* casted = dynamic_cast(this); + if (!casted) + IE_THROW() << "Cannot dynamically cast MemoryDesc"; + return casted; + } + + static constexpr size_t UNDEFINED_SIZE = std::numeric_limits::max(); + +protected: + MemoryDesc(const Shape& shape, MemoryDescType type) + : shape(shape), type(type) {} + + MemoryDesc(const std::vector& dims, MemoryDescType type) + : shape(dims), type(type) {} + + virtual size_t getMemSizeImp() const = 0; + + // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc. + virtual size_t getElementOffset(size_t elemNumber) const = 0; + + MemoryDescType type; + Shape shape; + + friend class BlobDumper; + // WA: optimizedNspc2Ncsp used getElementOffset inside implementation + friend class MKLDNNSplitNode; +}; + +using MemoryDescPtr = std::unique_ptr; +using MemoryDescConstPtr = std::unique_ptr; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp new file mode 100644 index 00000000000000..cc04db7f26f0a6 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp @@ -0,0 +1,395 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_memory_desc.h" +#include "cpu_memory_desc_utils.h" +#include "mkldnn_memory.h" +#include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" +#include +#include +#include +#include + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +namespace MKLDNNPlugin { + +/** + * Convert to BlockedDescriptor + * + * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} + * strides // the order of outer dims is encoded here + * inner_blks 4 16 4 + * inner_idxs 1 0 1 + * + * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. + * How to convert into IE representation: + * 0. Detect a new_outer_order of outer_dims via descending strides. + * 1. IE strides : concatenate strides in new_outer_order and inner strides. + * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks + * 3. IE order : concatenate new_outer_order and inner_idxs + */ +BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc) { + mkldnn::memory::desc desc = inpDesc; + const auto dims = desc.dims(); + + if (desc.data.format_kind != dnnl_blocked) + IE_THROW() << "Conversion is not possible"; + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::iota(outer_order.begin(), outer_order.end(), 0); + std::sort(outer_order.begin(), outer_order.end(), + [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + + // IE blocked order + // [new_outer_order] U [inner_idxs] + SizeVector ie_blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); + + // IE blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + SizeVector ie_blk_strides(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), + [&] (size_t i) { return blk_desc.strides[i]; }); + + // IE blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + SizeVector ie_blk_dims(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + ie_blk_dims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); + + // IE offset padded to data. Same as for oneDNN + SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; + size_t ie_blk_offset0 = desc.data.offset0; + + // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. + // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will + // fill it with zero. + ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); + + BlockedMemoryDesc res(MKLDNNMemory::convertToIePrec(desc.data_type()), SizeVector {begin(dims), end(dims)}, ie_blk_dims, + ie_blk_order, ie_blk_offset0, ie_blk_offset_to_data, ie_blk_strides); + return res; +} + + +InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { + if (auto blockingDesc = dynamic_cast(&desc)) { + return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), + {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), + blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); + } else if (auto mkldnnDesc = dynamic_cast(&desc)) { + auto blockingDesc = convertToBlockedDescriptor(*mkldnnDesc); + return InferenceEngine::TensorDesc(blockingDesc.getPrecision(), blockingDesc.getShape().getStaticDims(), + {blockingDesc.getBlockDims(), blockingDesc.getOrder(), blockingDesc.getOffsetPadding(), + blockingDesc.getOffsetPaddingToData(), blockingDesc.getStrides()}); + } + + IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; + + return InferenceEngine::TensorDesc(); +} + +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& desc) { + if (MemoryDescType::Blocked == desc.getType()) { + return convertToMKLDNNMemoryDesc(*(desc.as())); + } else if (MemoryDescType::Mkldnn == desc.getType()) { + return *(desc.as()); + } else { + IE_THROW() << "Cannot convert MemoryDesc to MKLDNNMemoryDesc"; + } +} + +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) { + dnnl_memory_desc_t mkldnnDesc; + + // scalar case + if (desc.getShape().getRank() == 0) { + mkldnn::memory::desc convertedDesc; + convertedDesc.data.format_kind = dnnl_blocked; + convertedDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); + convertedDesc.data.ndims = 1; + convertedDesc.data.dims[0] = 1; + convertedDesc.data.padded_dims[0] = 1; + convertedDesc.data.format_desc.blocking.strides[0] = 1; + convertedDesc.data.padded_offsets[0] = 0; + convertedDesc.data.offset0 = desc.getOffsetPadding(); + return MKLDNNMemoryDesc(convertedDesc); + } + + auto dims = desc.getShape().getStaticDims(); + + auto ie_blkdDims = desc.getBlockDims(); + auto ie_order = desc.getOrder(); + auto ie_offsetsToData = desc.getOffsetPaddingToData(); + auto ie_strides = desc.getStrides(); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = ie_order.size() - dims.size(); + + bool is_descending_strides = true; + for (int i = 1; i < ie_strides.size(); i++) { + is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Unsupported case for conversion"; + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[ie_order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { + inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Unsupported case for conversion"; + + // Fill general memory desc fields + mkldnnDesc.format_kind = dnnl_blocked; + mkldnnDesc.extra.flags = 0; + mkldnnDesc.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); + mkldnnDesc.ndims = dims.size(); + mkldnnDesc.offset0 = desc.getOffsetPadding(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.dims); + std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.padded_offsets); + std::fill(mkldnnDesc.padded_dims, mkldnnDesc.padded_dims + outer_ndims, 1); + for (size_t i = 0; i < ie_order.size(); i++) { + auto idx = ie_order[i]; + mkldnnDesc.padded_dims[idx] *= ie_blkdDims[i]; + } + + // Fill blocking desc + auto &dnn_blk_desc = mkldnnDesc.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); + std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; + } + + return MKLDNNMemoryDesc(mkldnnDesc); +} + + +/** + * Construct from IE::TensorDesc + * @param tDesc + * + * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} + * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. + * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence + * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims + * + * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) + * + * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of + * real dims spliting. + * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] + * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims + * Normalization of representation: Make strides growing but keep layout same as original. Not all + * layout allow us to meet normalize form of tensor desc. + * + * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] + */ +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc) { + mkldnn::memory::desc mkldnnDesc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef); + auto dims = tDesc.getDims(); + + // TODO: implicit conversion of dims is no good... + if (tDesc.getLayout() == Layout::SCALAR) { + mkldnnDesc.data.format_kind = dnnl_blocked; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = 1; + mkldnnDesc.data.dims[0] = 1; + mkldnnDesc.data.padded_dims[0] = 1; + mkldnnDesc.data.format_desc.blocking.strides[0] = 1; + mkldnnDesc.data.padded_offsets[0] = 0; + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + return MKLDNNMemoryDesc(mkldnnDesc); + } + + if (tDesc.getLayout() == Layout::ANY) { + mkldnnDesc.data.format_kind = dnnl_format_kind_any; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = dims.size(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.padded_dims); + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + std::fill(mkldnnDesc.data.padded_offsets, mkldnnDesc.data.padded_offsets + dims.size(), 0); + return MKLDNNMemoryDesc(mkldnnDesc); + } + + auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); + auto ie_order = tDesc.getBlockingDesc().getOrder(); + auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); + auto ie_strides = tDesc.getBlockingDesc().getStrides(); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = ie_order.size() - dims.size(); + + bool is_descending_strides = true; + for (int i = 1; i < ie_strides.size(); i++) { + is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Unsupported case for conversion"; + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[ie_order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { + inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Unsupported case for conversion"; + + // Fill general memory desc fields + mkldnnDesc.data.format_kind = dnnl_blocked; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = dims.size(); + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); + std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.data.padded_offsets); + std::fill(mkldnnDesc.data.padded_dims, mkldnnDesc.data.padded_dims + outer_ndims, 1); + for (size_t i = 0; i < ie_order.size(); i++) { + auto idx = ie_order[i]; + mkldnnDesc.data.padded_dims[idx] *= ie_blkdDims[i]; + } + + // Fill blocking desc + auto &dnn_blk_desc = mkldnnDesc.data.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); + std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; + } + + return MKLDNNMemoryDesc(mkldnnDesc); +} + +BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MemoryDesc &desc) { + if (desc.getType() == MemoryDescType::Blocked) { + return *(desc.as()); + } else if (desc.getType() == MemoryDescType::Mkldnn) { + return MemoryDescUtils::convertToBlockedDescriptor(*(desc.as())); + } else { + IE_THROW() << "Cannot convert to blocked memory descriptor. Unsupported memory desc type"; + } +} + +MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc) { + if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc"; + + mkldnn::memory::desc retDesc = desc; + retDesc.data.offset0 = Shape::UNDEFINED_DIM; + return MKLDNNPlugin::make_unique(retDesc); +} + +MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) { + std::vector strides; + std::vector offsetPaddingToData; + + strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); + offsetPaddingToData.resize(desc.getBlockDims().size(), 0); + size_t offsetPadding = Shape::UNDEFINED_DIM; + + return MKLDNNPlugin::make_unique(desc.getPrecision(), desc.getShape().getDims(), desc.getBlockDims(), + desc.getOrder(), offsetPadding, offsetPaddingToData, strides); +} + +MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) { + if (MemoryDescType::Blocked == desc->getType()) { + auto blockedDesc = desc->as(); + return MKLDNNPlugin::make_unique(blockedDesc->getPrecision(), blockedDesc->getShape().getDims(), + blockedDesc->getBlockDims(), blockedDesc->getOrder()); + } else if (MemoryDescType::Mkldnn == desc->getType()) { + auto mkldnnDesc = desc->as(); + mkldnn::memory::desc retDesc = *mkldnnDesc; + retDesc.data.offset0 = 0; + return MKLDNNPlugin::make_unique(retDesc); + } else { + IE_THROW() << "resetOffset support Blocked and Mkldnn descpriptors only"; + } +} + +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { + // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor + auto& memDesc = mem.GetDesc(); + InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); + + desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); + return MKLDNNPlugin::isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, mem.GetData()); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h new file mode 100644 index 00000000000000..5cc6b0fc1038c7 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h @@ -0,0 +1,88 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace MKLDNNPlugin { +class MKLDNNMemoryDesc; +class BlockedMemoryDesc; +class MKLDNNMemory; + +class MemoryDescUtils { +public: + /** + * @brief Converts MemoryDesc to InferenceEngine::TensorDesc + * @param desc MemoryDesc to be converted + * @return converted InferenceEngine::TensorDesc + */ + static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); + + /** + * @brief Converts MemoryDesc to MKLDNNMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const MemoryDesc& desc); + + /** + * @brief Converts BlockedMemoryDesc to MKLDNNMemoryDesc + * @param desc BlockedMemoryDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); + + /** + * @brief Converts InferenceEngine::TensorDesc to MKLDNNMemoryDesc + * @param desc InferenceEngine::TensorDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& desc); + + /** + * @brief Converts MemoryDesc to BlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static BlockedMemoryDesc convertToBlockedDescriptor(const MemoryDesc& desc); + + /** + * @brief Converts MKLDNNMemoryDesc to BlockedMemoryDesc + * @param desc MKLDNNMemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static BlockedMemoryDesc convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); + + /** + * @brief Creates MKLDNNMemoryDesc with offset0 of UNDEFINED_DIM size + * @param desc modifiable MKLDNNMemoryDesc + * @return pointer to MKLDNNMemoryDesc + */ + static MemoryDescPtr applyUndefinedOffset(const MKLDNNMemoryDesc& desc); + + /** + * @brief Creates BlockedMemoryDesc with offsetPadding, strides of UNDEFINED_DIM size and offsetPaddingToData of 0 size + * @param desc modifiable BlockedMemoryDesc + * @return pointer to BlockedMemoryDesc + */ + static MemoryDescPtr applyUndefinedOffset(const BlockedMemoryDesc& desc); + + /** + * @brief Creates MemoryDesc with offsetPadding of 0 size + * @param desc modifiable MemoryDesc + * @return pointer to MemoryDesc + */ + static MemoryDescPtr resetOffset(const MemoryDesc* desc); + + /** + * @brief Creates InferenceEngine::Blob from MKLDNNMemory + * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob + * @return pointer to InferenceEngine::Blob + */ + static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h new file mode 100644 index 00000000000000..fd063c2dc18c13 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h @@ -0,0 +1,159 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "perf_count.h" +#include +#include +#include +#include +#include "mkldnn_dims.h" + +namespace MKLDNNPlugin { + +class Shape { +public: + Shape() = default; + + explicit Shape(const ngraph::PartialShape& shape) { + minDims = shape.get_min_shape(); + maxDims = shape.get_max_shape(); + type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic; + + initDims(); + } + + explicit Shape(const InferenceEngine::SizeVector& shape) { + minDims = shape; + maxDims = shape; + type = ShapeType::Static; + + initDims(); + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return lower bound of shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [6, 6, 6, 6] + * minDims = [1, 1, 1, 1] + * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return lower bound of shape = [1, 1, 1, 1] + */ + const std::vector& getMinDims() const { + return minDims; + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return upper bound of shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [6, 6, 6, 6] + * minDims = [1, 1, 1, 1] + * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return upper bound of shape = [6, 6, 6, 6] + */ + const std::vector& getMaxDims() const { + return maxDims; + } + + /** + * @brief return defined shape or throw exception for dynamic case + * @return return shape + */ + const std::vector& getStaticDims() const { + if (type != ShapeType::Static) { + IE_THROW() << "Cannot get dims for non static shape"; + } + + return minDims; + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return defined shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [2, 3, 6, 6] + * minDims = [2, 3, 1, 1] + * dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return shape with defined and undefined dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] + */ + const std::vector& getDims() const { + return dims; + } + bool isStatic() const { + return type == ShapeType::Static; + } + + size_t getRank() const { + return minDims.size(); + } + + size_t getElementsCount() const { + if (type != ShapeType::Static) { + IE_THROW() << "Cannot get elements count for non static shape"; + } + + size_t size = 1; + + for (int i = 0; i < minDims.size(); i++) { + size *= minDims[i]; + } + + return size; + } + + ngraph::PartialShape toPartialShape() const { + std::vector nGraphDims; + nGraphDims.reserve(minDims.size()); + for (int i = 0; i < minDims.size(); i++) { + nGraphDims.emplace_back(minDims[i], maxDims[i]); + } + return ngraph::PartialShape(nGraphDims); + } + + bool operator == (const Shape& rhs) const { + return minDims == rhs.minDims && maxDims == rhs.maxDims; + } + + bool operator != (const Shape& rhs) const { + return !(*this == rhs); + } + + enum : size_t { + UNDEFINED_DIM = 0xffffffffffffffff + }; + +private: + void initDims() { + dims.resize(minDims.size()); + for (int i = 0; i < minDims.size(); i++) { + dims[i] = minDims[i] == maxDims[i] ? minDims[i] : UNDEFINED_DIM; + } + } + + enum class ShapeType { + Static, + Dynamic + } type {ShapeType::Static}; + + std::vector minDims; + std::vector maxDims; + std::vector dims; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index e5bc8af0b5c745..7c820c4db50ccf 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -16,6 +16,7 @@ enum Type { Deconvolution, Lrn, Pooling, + AdaptivePooling, FullyConnected, Softmax, Split, @@ -85,7 +86,9 @@ enum Type { ExperimentalDetectronPriorGridGenerator, ExperimentalDetectronGenerateProposalsSingleImage, ExtractImagePatches, - NonMaxSuppression + NonMaxSuppression, + MatrixNms, + MulticlassNms }; enum Algorithm { @@ -95,6 +98,10 @@ enum Algorithm { PoolingMax, PoolingAvg, + // Adaptive pooling algorithms + AdaptivePoolingMax, + AdaptivePoolingAvg, + // Convolution algorithms ConvolutionCommon, ConvolutionGrouped, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp index 1415dc1ae95e20..34261b1ac87121 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp @@ -6,7 +6,6 @@ #include "mkldnn_node.h" #include "mkldnn_extension_utils.h" #include -#include "utils/cpu_utils.hpp" using namespace mkldnn; namespace MKLDNNPlugin { @@ -29,7 +28,7 @@ const MKLDNNNodePtr MKLDNNEdge::getChild() const { } bool MKLDNNEdge::isUseExternalMemory() const { - return externalMemoryPtr; + return useExternalMemory; } bool MKLDNNEdge::isDropped() const { @@ -77,7 +76,7 @@ bool MKLDNNEdge::needReorder() { int inNumber = getInputNum(); bool in_place = inPlace(); bool childCanChangeMem = childSPD->getConfig().outConfs.empty(); - for (const auto conf : childSPD->getConfig().outConfs) { + for (const auto& conf : childSPD->getConfig().outConfs) { if (conf.inPlace == outNumber && outNumber >= 0) childCanChangeMem = true; } @@ -89,7 +88,7 @@ bool MKLDNNEdge::needReorder() { int outNumber = edge->getOutputNum(); if (childSPD->getConfig().outConfs.empty()) count++; - for (const auto conf : childSPD->getConfig().outConfs) { + for (const auto& conf : childSPD->getConfig().outConfs) { if (conf.inPlace == outNumber) count++; } @@ -114,7 +113,7 @@ bool MKLDNNEdge::needReorder() { outNumber >= 0 && outNumber < childSPD->getConfig().inConfs.size() && childSPD->getConfig().inConfs[outNumber].inPlace >= 0) canBeInPlaceConflicts = true; } - return canBeInPlaceConflicts || !MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc()); + return canBeInPlaceConflicts || !getInputDesc().isCompatible(getOutputDesc()); } void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { @@ -124,35 +123,6 @@ void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { status = Status::Allocated; } -const InferenceEngine::TensorDesc& MKLDNNEdge::getInputDescRO() const { - return inputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() { - if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) { - inputDesc = getSpecifiedInputDesc({}); - } - return inputDesc; -} - -const InferenceEngine::TensorDesc& MKLDNNEdge::getOutputDescRO() const { - return outputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() { - if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) { - outputDesc = getSpecifiedOutputDesc({}); - } - return outputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getDesc() { - if (!MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc())) - IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->" - << getChild()->getName(); - return getInputDesc(); -} - int MKLDNNEdge::getInputNum() const { return parent_port; } @@ -168,45 +138,29 @@ void MKLDNNEdge::allocate(const void* mem_ptr) { if (memoryPtr) IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; - auto inputDesc = getInputDesc(); - auto outputDesc = getOutputDesc(); - if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) || - (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && - (inputDesc.getPrecision() != outputDesc.getPrecision() || - inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc()))) - IE_THROW() << "Cannot allocate memory. Nodes have primitive descriptors with different formats."; - if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) - IE_THROW() << "Cannot get input descriptor!"; + auto& inputDesc = getInputDesc(); + auto& outputDesc = getOutputDesc(); + if (!inputDesc.isDefined() || !outputDesc.isDefined()) + IE_THROW() << "Cannot allocate memory for undefined descriptors."; + if (!inputDesc.isCompatible(outputDesc)) + IE_THROW() << "Cannot allocate memory for incompatible descriptors."; auto parentPtr = getParent(); memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(inputDesc), mem_ptr, false); // no pads zeroing + + memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing status = Status::Allocated; } -std::string MKLDNNEdge::name() { - auto tensorDescToStr = [](InferenceEngine::TensorDesc const & desc) { - std::string name = desc.getPrecision().name(); - - auto blockingDesc = desc.getBlockingDesc(); - auto dims = blockingDesc.getBlockDims(); - - if (!dims.empty()) { - name += "["; - for (size_t i = 1; i < dims.size(); ++i) { - name += std::to_string(dims[i - 1]) + ","; - } - name += std::to_string(dims.back()) + "]"; - } - - return name; - }; - +std::string MKLDNNEdge::name() const { auto parentPtr = getParent(); auto childPtr = getChild(); - return parentPtr->getName() + std::to_string(parent_port) + tensorDescToStr(getInputDesc()) - + "<->" + childPtr->getName() + std::to_string(child_port); + std::stringstream result; + + result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port; + + return result.str(); } void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { @@ -221,7 +175,7 @@ void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { auto ptr = weightsCache->findOrCreate(name(), alloc, false); memoryPtr = *ptr; - externalMemoryPtr = true; + useExternalMemory = true; status = Status::Allocated; } else { allocate(); @@ -242,10 +196,13 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) { status = state; } -const MKLDNNDims& MKLDNNEdge::getDims() { - if (!dims.ndims()) { - MKLDNNDims outDims; - MKLDNNDims inDims; +// TODO [DS]: remove while DynamicShapes migration +// TODO [DS]: How should we validate shape compatibility? +// TODO [DS]: Why do we allow uninitialized shape? +const Shape& MKLDNNEdge::getShape() { + if (!shape.getRank()) { + Shape inShape; + Shape outShape; auto childPtr = getChild(); auto parentPtr = getParent(); @@ -254,8 +211,8 @@ const MKLDNNDims& MKLDNNEdge::getDims() { IE_THROW() << "Error cannot find input data for " << child.lock()->getName() << " from " << parent.lock()->getName(); } - if (inNum < childPtr->inDims.size()) { - outDims = childPtr->inDims[inNum]; + if (inNum < childPtr->inputShapes.size()) { + outShape = childPtr->inputShapes[inNum]; } int outNum = getInputNum(); @@ -263,84 +220,34 @@ const MKLDNNDims& MKLDNNEdge::getDims() { IE_THROW() << "Error cannot find output data for " << parent.lock()->getName() << " to " << child.lock()->getName(); } - if (outNum >= parentPtr->outDims.size()) + if (outNum >= parentPtr->outputShapes.size()) outNum = 0; - if (outNum < parentPtr->outDims.size()) { - inDims = parentPtr->outDims[outNum]; + if (outNum < parentPtr->outputShapes.size()) { + inShape = parentPtr->outputShapes[outNum]; } - if (inDims.ndims() && outDims.ndims() && inDims.ndims() != outDims.ndims() && inDims.size() != outDims.size()) + if (inShape.getRank() && outShape.getRank() && inShape.getRank() != outShape.getRank() && inShape.getElementsCount() != outShape.getElementsCount()) IE_THROW() << "Nodes " << getParent()->getName() << " and " << getChild()->getName() << " have incompatible dimensions!"; - if (outDims.ndims() != 0) { - dims = outDims; - } else if (inDims.ndims() != 0) { - dims = inDims; + if (outShape.getRank() != 0) { + shape = outShape; + } else if (inShape.getRank() != 0) { + shape = inShape; } else { - dims = MKLDNNDims({(size_t)1}); + shape = Shape(InferenceEngine::SizeVector({1})); } - if (!(outDims.ndims() == 0 && inDims.ndims() == 0) && !dims.ndims()) + if (!(outShape.getRank() == 0 && inShape.getRank() == 0) && !shape.getRank()) IE_THROW() << "Cannot detect right dims for nodes " << getParent()->getName() << " and " << getChild()->getName(); } - return dims; -} - -bool MKLDNNEdge::nodeCanChangeDesc(const MKLDNNNodePtr &node) const { - PrimitiveDescInfo * selectedPd = node->getSelectedPrimitiveDescriptor(); - if (selectedPd == nullptr) - IE_THROW() << "Primitive descriptor for node " << node->getName() << " is not selected."; - - for (auto &inputDesc : selectedPd->getConfig().inConfs) { - if (inputDesc.desc.getLayout() != InferenceEngine::Layout::ANY) { - return true; - } - } - for (auto &outDesc : selectedPd->getConfig().outConfs) { - if (outDesc.desc.getLayout() != InferenceEngine::Layout::ANY) { - return true; - } - } - - MKLDNNDims inputDims; - for (size_t i = 0; i < node->getParentEdges().size(); i++) { - if (inputDims.size() == 1 && inputDims.ndims() == 0) { - inputDims = node->getParentEdgeAt(i)->getDims(); - continue; - } - - if (inputDims.ndims() != node->getParentEdgeAt(i)->getDims().ndims()) { - return true; - } - } - for (size_t i = 0; i < node->getChildEdges().size(); i++) { - if (inputDims.size() == 1 && inputDims.ndims() == 0) { - inputDims = node->getChildEdgeAt(i)->getDims(); - continue; - } - - if (inputDims.ndims() != node->getChildEdgeAt(i)->getDims().ndims()) { - return true; - } - } - - return false; + return shape; } -/// In we have {any, any, any} -> {any} or {any} -> {any, any, any} or {any} -> {any} it means that -/// layer doesn't change memory format -/// We don't support {any, any, nchw} -> {any} -InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::map formats, size_t enterCountUp, size_t enterCountDown) { - InferenceEngine::TensorDesc inDesc; - - if (inputDesc.getLayout() != InferenceEngine::Layout::ANY) { - return inputDesc; - } - +const MemoryDesc& MKLDNNEdge::getInputDesc() const { auto parentPtr = getParent(); if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Primitive descriptor for node " << parentPtr->getName() << " is not selected."; @@ -349,248 +256,48 @@ InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::mapgetName() << "."; - if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()) - inputIdx = 0; - inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc; - - if (inDesc.getLayout() != InferenceEngine::Layout::ANY) { - return inDesc; - } - - bool isFormatChanging = nodeCanChangeDesc(parentPtr); - - if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) { - inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc; - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = inDesc; - return inDesc; - } + auto& outConfs = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs; + if (outConfs.empty()) + IE_THROW() << "Node " << parentPtr->getName() << " has empty output config list."; - for (size_t i = 0; i < parentPtr->getChildEdges().size(); i++) { - auto childEdge = parentPtr->getChildEdgeAt(i); - auto child = childEdge->getChild(); - int childIdx = childEdge->getOutputNum(); - if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 || - childEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx) - childIdx = 0; - memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - continue; - } - if (nodeCanChangeDesc(child)) - continue; - - if (enterCountUp < 2) { - childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - } - } - } - - if (!isFormatChanging) { - for (size_t i = 0; i < parentPtr->getParentEdges().size(); i++) { - auto parentEdge = parentPtr->getParentEdgeAt(i); - auto parent = parentEdge->getParent(); - int parentIdx = parentEdge->getInputNum(); - if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 || - parentEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) { - parentIdx = 0; - } - memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - continue; - } - if (nodeCanChangeDesc(parent)) - continue; - - if (enterCountUp < 2) { - parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - } - } - } - } - - size_t maxFormatCount = 0; - memory::format_tag desc = MKLDNNMemory::GetPlainFormat(getDims()); - for (auto &it : formats) { - if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) { - maxFormatCount = it.second; - desc = it.first; - } - } - - auto inDataType = MKLDNNMemoryDesc(parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc).getDataType(); - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, desc); - if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) { - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc = - MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, desc)); - } + if (inputIdx >= outConfs.size()) + inputIdx = 0; - return MKLDNNMemoryDesc(getDims(), inDataType, desc); + return *(outConfs[inputIdx].desc); } -InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedOutputDesc(std::map formats, size_t enterCountUp, size_t enterCountDown) { - InferenceEngine::TensorDesc outDesc; - - if (outputDesc.getLayout() != InferenceEngine::Layout::ANY) { - return outputDesc; - } - +const MemoryDesc& MKLDNNEdge::getOutputDesc() const { auto childPtr = getChild(); - auto parentPtr = getParent(); if (childPtr->getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Primitive descriptor for node " << childPtr->getName() << " is not selected."; int outputIdx = getOutputNum(); - int inputIdx = getInputNum(); if (outputIdx < 0) { IE_THROW() << "Edge cannot be found for node" << childPtr->getName() << "."; } - if (outputIdx >= childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size()) - outputIdx = 0; - outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc; - - if (outDesc.getLayout() != InferenceEngine::Layout::ANY) { - return outDesc; - } - - if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()) - inputIdx = 0; - - bool isFormatChanging = nodeCanChangeDesc(childPtr); - - if ((!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() && - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) || - (isFormatChanging && inputIdx >= 0 && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY)) { - auto inputDataType = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc.getPrecision(); - if (!isFormatChanging) - outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc; - else - outDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc; - childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = InferenceEngine::TensorDesc(inputDataType, getDims().ToSizeVector(), - {outDesc.getBlockingDesc().getBlockDims(), - outDesc.getBlockingDesc().getOrder()}); - return childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc; - } - - for (size_t i = 0; i < childPtr->getParentEdges().size(); i++) { - auto parentEdge = childPtr->getParentEdgeAt(i); - auto parent = parentEdge->getParent(); - int parentIdx = parentEdge->getInputNum(); - if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 || - parentEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) { - parentIdx = 0; - } - memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - continue; - } - if (nodeCanChangeDesc(parent)) - continue; - - if (enterCountDown < 2) { - parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - } - } - } - - if (!isFormatChanging) { - for (size_t i = 0; i < childPtr->getChildEdges().size(); i++) { - auto childEdge = childPtr->getChildEdgeAt(i); - auto child = childEdge->getChild(); - int childIdx = childEdge->getOutputNum(); - if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 || - childEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx) { - childIdx = 0; - } - memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - continue; - } - if (nodeCanChangeDesc(child)) - continue; + auto& inConfs = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs; + if (inConfs.empty()) + IE_THROW() << "Node " << childPtr->getName() << " has empty input config list."; - if (enterCountDown < 2) { - childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - } - } - } - } + if (outputIdx >= inConfs.size()) + outputIdx = 0; - size_t maxFormatCount = 0; - memory::format_tag format = MKLDNNMemory::GetPlainFormat(getDims()); - for (auto &it : formats) { - if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) { - maxFormatCount = it.second; - format = it.first; - } - } + return *(inConfs[outputIdx].desc); +} - auto inDataType = MKLDNNMemoryDesc(childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[getOutputNum()].desc).getDataType(); - childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, format); - if (!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() && - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) { - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc = - MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, format)); - } +const MemoryDesc& MKLDNNEdge::getDesc() const { + if (!getInputDesc().isCompatible(getOutputDesc())) + IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->" + << getChild()->getName(); - return childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc; + return getInputDesc(); } const MKLDNNMemory &MKLDNNEdge::getMemory() { if (status == Status::NotAllocated) { memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData()); + memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); memoryFromEdge.reset(); changeStatus(Status::Allocated); } @@ -601,7 +308,7 @@ const MKLDNNMemory &MKLDNNEdge::getMemory() { MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { if (status == Status::NotAllocated) { memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData()); + memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); memoryFromEdge.reset(); changeStatus(Status::Allocated); } @@ -609,19 +316,6 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { return memoryPtr; } -InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() { - if (!memoryPtr) - IE_THROW() << "Cannot get blob! Edge isn't initialized."; - InferenceEngine::TensorDesc desc = getDesc(); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) - desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getLayout()); - else - desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getBlockingDesc()); - - return isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, memoryPtr->GetData()); -} - void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) { memoryFromEdge = edge; status = Status::NotAllocated; @@ -633,7 +327,7 @@ void MKLDNNEdge::validate() { getMemory(); getParent(); getChild(); - getDims(); + getShape(); if (status != Status::Allocated) { IE_THROW() << "Error memory is not allocated!"; @@ -644,8 +338,7 @@ void MKLDNNEdge::validate() { MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const { auto memoryFromEdgePtr = memoryFromEdge.lock(); if (!memoryFromEdgePtr) { - IE_THROW() << "Cannot get memory ptr for edge(" << getParent()->getName() << "->" - << getChild()->getName() << "). The pointer on the edge with memory is empty!"; + IE_THROW() << "Cannot get memory ptr for edge( " << name() << " ). The pointer on the edge with memory is empty!"; } return memoryFromEdgePtr; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h index 63e2a16414d94f..5e6f4d23542f9f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h @@ -5,11 +5,9 @@ #pragma once #include -#include -#include "mkldnn_memory.h" -#include "mkldnn_dims.h" +#include "cpu_shape.h" +#include "cpu_memory_desc.h" #include "mkldnn_weights_cache.hpp" -#include "mkldnn/ie_mkldnn.h" #include #include @@ -53,10 +51,7 @@ class MKLDNNEdge { const std::shared_ptr getParent() const; const std::shared_ptr getChild() const; - InferenceEngine::Blob::Ptr getBlob(); - InferenceEngine::TensorDesc getDesc(); - - const MKLDNNDims &getDims(); + const Shape &getShape(); const MKLDNNMemory& getMemory(); MKLDNNMemoryPtr& getMemoryPtr(); @@ -73,34 +68,23 @@ class MKLDNNEdge { MKLDNNEdgePtr getSharedEdge() const; MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const; - const InferenceEngine::TensorDesc& getInputDescRO() const; - const InferenceEngine::TensorDesc& getOutputDescRO() const; - private: - std::string name(); + std::string name() const; std::weak_ptr parent; std::weak_ptr child; int parent_port; int child_port; - bool externalMemoryPtr = false; + bool useExternalMemory = false; MKLDNNEdgeWeakPtr memoryFromEdge; - MKLDNNDims dims; + Shape shape; MKLDNNMemoryPtr memoryPtr; Status status = Status::Uninitialized; - InferenceEngine::TensorDesc getInputDesc(); - InferenceEngine::TensorDesc getOutputDesc(); - InferenceEngine::TensorDesc getSpecifiedInputDesc(std::map formats, - size_t enterCountUp = 1, size_t enterCountDown = 0); - InferenceEngine::TensorDesc getSpecifiedOutputDesc(std::map formats, - size_t enterCountUp = 0, size_t enterCountDown = 1); - - InferenceEngine::TensorDesc inputDesc; - InferenceEngine::TensorDesc outputDesc; - - bool nodeCanChangeDesc(const std::shared_ptr& node) const; + const MemoryDesc& getInputDesc() const; + const MemoryDesc& getOutputDesc() const; + const MemoryDesc& getDesc() const; enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index 2d7d4e5e6b61e2..d1c851645b1d78 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -32,7 +32,7 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) } } -memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision prec) { +memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) { switch (prec) { case InferenceEngine::Precision::FP32: return memory::data_type::f32; @@ -47,6 +47,8 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::P return memory::data_type::u8; case InferenceEngine::Precision::BIN: return memory::data_type::bin; + case InferenceEngine::Precision::UNSPECIFIED: + return memory::data_type::undef; default: { IE_THROW() << "The plugin does not support " << prec.name(); } @@ -67,155 +69,18 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d return InferenceEngine::Precision::U8; case memory::data_type::bin: return InferenceEngine::Precision::BIN; + case memory::data_type::undef: + return InferenceEngine::Precision::UNSPECIFIED; default: { IE_THROW() << "Unsupported data type."; } } } -InferenceEngine::TensorDesc MKLDNNExtensionUtils::getUninitTensorDesc(const InferenceEngine::TensorDesc &desc) { - std::vector notInitArr; - std::vector zeroArr; - for (size_t i = 0; i < desc.getBlockingDesc().getBlockDims().size(); i++) { - notInitArr.push_back(std::numeric_limits::max()); - zeroArr.push_back(0); - } - // MKLDNN doesn't support offset_padding_to_data[i] != 0 (assert(src_d_blk.offset_padding_to_data[d] == 0);) - return desc.getLayout() == InferenceEngine::Layout::ANY ? desc : - InferenceEngine::TensorDesc(desc.getPrecision(), desc.getDims(), - {desc.getBlockingDesc().getBlockDims(), desc.getBlockingDesc().getOrder(), - std::numeric_limits::max(), zeroArr, notInitArr}); -} - -bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2) { - if (desc1.getDims() != desc2.getDims() || desc1.getPrecision() != desc2.getPrecision()) - return false; - if (desc1.getLayout() == InferenceEngine::Layout::SCALAR && desc2.getLayout() == InferenceEngine::Layout::SCALAR) - return true; - if (desc1.getLayout() == InferenceEngine::Layout::ANY || desc2.getLayout() == InferenceEngine::Layout::ANY) - return true; - bool batch1 = desc1.getDims()[0] == 1; - const auto& in1Block = desc1.getBlockingDesc(); - const auto& in2Block = desc2.getBlockingDesc(); - size_t uninitNum = std::numeric_limits::max(); - if (in1Block.getBlockDims().size() != in2Block.getBlockDims().size()) - return false; - for (size_t i = 0; i < in1Block.getBlockDims().size(); i++) { - if (in1Block.getBlockDims()[i] != in2Block.getBlockDims()[i] && - in1Block.getBlockDims()[i] != uninitNum && in2Block.getBlockDims()[i] != uninitNum) - return false; - if (in1Block.getOffsetPaddingToData()[i] != in2Block.getOffsetPaddingToData()[i] && - in1Block.getOffsetPaddingToData()[i] != uninitNum && in2Block.getOffsetPaddingToData()[i] != uninitNum) - return false; - if (i >= batch1 && in1Block.getStrides()[i] != in2Block.getStrides()[i] && - in1Block.getStrides()[i] != uninitNum && in2Block.getStrides()[i] != uninitNum) - return false; - if (in1Block.getOrder()[i] != in2Block.getOrder()[i] && - in1Block.getOrder()[i] != uninitNum && in2Block.getOrder()[i] != uninitNum) - return false; - } - return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() && - in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum); -} - -PartialBlkDesc PartialBlkDesc::makePlain(const InferenceEngine::SizeVector &dims) { - PartialBlkDesc res; - res.outer_order.resize(dims.size()); - std::iota(res.outer_order.begin(), res.outer_order.end(), 0); - return res; -} - -PartialBlkDesc PartialBlkDesc::makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size) { - PartialBlkDesc res; - res.outer_order.resize(dims.size()); - std::iota(res.outer_order.begin(), res.outer_order.end(), 0); - res.inner_blk_size = {block_size}; - res.inner_blk_idxes = {1}; - return res; -} - - -PartialBlkDesc PartialBlkDesc::makeTailC(const InferenceEngine::SizeVector &dims) { - PartialBlkDesc res = makePlain(dims); - if (dims.size() > 2) { - auto itr = res.outer_order.begin() + 1; - std::rotate(itr, itr + 1, res.outer_order.end()); - } - return res; -} - -PartialBlkDesc PartialBlkDesc::extractFrom(const InferenceEngine::TensorDesc &desc) { - if (desc.getLayout() == InferenceEngine::ANY) - IE_THROW() << "Cannot extract partial blocked descriptor for `ANY` layout"; - - const auto &dims = desc.getDims(); - const auto &blk = desc.getBlockingDesc(); - const auto &blk_dims = blk.getBlockDims(); - const auto &blk_order = blk.getOrder(); - - PartialBlkDesc res; - res.outer_order = {blk_order.begin(), blk_order.begin() + dims.size()}; - res.inner_blk_idxes = {blk_order.begin() + dims.size(), blk_order.end()}; - res.inner_blk_size = {blk_dims.begin() + dims.size(), blk_dims.end()}; - - return res; -} - -bool PartialBlkDesc::isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const { - auto tmp_dims = dims; - for (int i = 0; i < inner_blk_size.size(); i++) { - auto idx = inner_blk_idxes[i]; - auto blk = inner_blk_size[i]; - if (tmp_dims[idx] % blk == 0) - tmp_dims[idx] /= blk; - else - return true; - } - return false; -} - -bool PartialBlkDesc::operator == (const PartialBlkDesc& it) const { - return std::tie(this->inner_blk_idxes, - this->inner_blk_size, - this->outer_order) == - std::tie(it.inner_blk_idxes, - it.inner_blk_size, - it.outer_order); +InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const mkldnn::memory::dims& dims) { + return InferenceEngine::SizeVector(dims.begin(), dims.end()); } -// Lexicographical compare of content -bool PartialBlkDesc::operator < (const PartialBlkDesc& it) const { - return std::tie(this->inner_blk_idxes, - this->inner_blk_size, - this->outer_order) < - std::tie(it.inner_blk_idxes, - it.inner_blk_size, - it.outer_order); -} - -std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) { - std::string inArgs, outArgs; - if (parentDesc.getPrecision() != childDesc.getPrecision()) { - inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); - outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); - } - auto fmt_tag_src = MKLDNNMemoryDesc(parentDesc).getFormat(); - auto fmt_tag_dst = MKLDNNMemoryDesc(childDesc).getFormat(); - if (fmt_tag_src != fmt_tag_dst || one_of(mkldnn::memory::format_tag::undef, fmt_tag_src, fmt_tag_dst)) { - inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_src); - outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_dst); - } - return inArgs + "_" + outArgs; -} - -InferenceEngine::Precision MKLDNNExtensionUtils::getMaxPrecision(std::vector precisions) { - if (!precisions.empty()) { - std::sort(precisions.begin(), precisions.end(), - [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { - return lhs.size() > rhs.size(); - }); - return precisions[0]; - } - - return InferenceEngine::Precision::UNSPECIFIED; +std::vector MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { + return std::vector(dims.begin(), dims.end());; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h index 95e14a7afa2cb3..8e7f9a1b3742e7 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h @@ -11,77 +11,17 @@ #include #include "mkldnn.hpp" -#include "mkldnn_memory.h" +#include "cpu_memory_desc.h" namespace MKLDNNPlugin { - -/** - * Partial tensor descriptor - * - * Represent a classes of layout. As example Plain, TailC, CBlocked and other. - * - * The tensor are in one layout family if they have same PartialBlkDesc. - * - * Any tensor will have same PartialBlkDesc as it subview tensor. - * - * PartialBlkDesc plus Dims allow to reconstruct real tensorDesc (dense representation). - */ -class PartialBlkDesc { -public: - /** - * Check if this partial blocking desc will lead to additional zero padding - * for real tensor with provided dims - * - * Example: dims [2, 3, 8, 8] with blocking by 16 for second dim. Will lead - * to effective dims [2, 16, 8, 8] with zeroing all values - * [:, 3:16, :, :] - * - * @param dims to check on zero auto padding - * @return true if provided dims will use auto padding. Otherwise false. - */ - bool isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const; - - /** - * Construct PartialBlkDesc from provided TensorDesc - * - * PartialBlkDesc has less expressiveness power so some information from TensorDesc will be dropped. - * The different TensorDesc object will has equal PartialBlkDesc. - * - * @param desc to extract PartialBlkDesc information about kind of layout - * @return PartialBlkDesc object corresponds layout described in desc - */ - static PartialBlkDesc extractFrom(const InferenceEngine::TensorDesc &desc); - - /** Construct plain PartialBlkDesc based on dims information */ - static PartialBlkDesc makePlain(const InferenceEngine::SizeVector &dims); - - /** Construct blocked Channel PartialBlkDesc based on dims information */ - static PartialBlkDesc makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size); - - /** Construct per Channel PartialBlkDesc based on dims information */ - static PartialBlkDesc makeTailC(const InferenceEngine::SizeVector &dims); - - /** Compare operators. Allow to use it as key for std::map */ - bool operator == (const PartialBlkDesc& it) const; - bool operator < (const PartialBlkDesc& it) const; - -private: - PartialBlkDesc() = default; - InferenceEngine::SizeVector outer_order; - InferenceEngine::SizeVector inner_blk_size; - InferenceEngine::SizeVector inner_blk_idxes; -}; - class MKLDNNExtensionUtils { public: static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType); - static mkldnn::memory::data_type IEPrecisionToDataType(InferenceEngine::Precision prec); + static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec); static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); - static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc); - static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2); - static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc); - static InferenceEngine::Precision getMaxPrecision(std::vector precisions); + static InferenceEngine::SizeVector convertToSizeVector(const mkldnn::memory::dims& dims); + static std::vector convertToDnnlDims(const InferenceEngine::SizeVector& dims); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 9d10ad036da759..b222126aa98165 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -39,22 +39,14 @@ #include "utils/node_dumper.h" #include "utils/ngraph_utils.hpp" #include "utils/cpu_utils.hpp" +#include "cpu_memory_desc_utils.h" #include #include #include #include #include -#include - -/***************************************************** - * Debug capability - * - PRINT_GRAPH_INFO : Define it to enable printing - * additional information to std output. - * - * @todo Align with CPU_DEBUG_CAPS implementation - *****************************************************/ -// #define PRINT_GRAPH_INFO +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -94,7 +86,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgr this->reuse_io_tensors = false; isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(subgraph); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(subgraph); // Map data object onto producer node std::map, std::pair> op2node; @@ -168,7 +160,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgr auto parentNode = portInfo.first; auto port = portInfo.second; const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outDims[port].ToSizeVector(), + const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); @@ -192,7 +184,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana } isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(func); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); auto orderedOps = func->get_ordered_ops(); @@ -269,7 +261,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana auto parentNode = op2node[unusedOutput.get_node_shared_ptr()]; const auto port = unusedOutput.get_index(); const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outDims[port].ToSizeVector(), + const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); @@ -306,15 +298,15 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana // Loading mean images for (const auto& input : inputsInfo) { - MKLDNNDims outDims; - if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) { - outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1)); + Shape outShape; + if (!inputNodesMap[input.first]->outputShapes.front().getRank()) { + outShape = Shape(SizeVector({1, 1})); } else { - outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims(); + outShape = inputNodesMap[input.first]->outputShapes.front(); } InputInfo::Ptr ii = inputsInfo[input.first]; if (ii && ii->getPreProcess().getNumberOfChannels()) { - _normalizePreprocMap[input.first].Load(outDims, ii); + _normalizePreprocMap[input.first].Load(outShape, ii); } } } @@ -347,6 +339,8 @@ void MKLDNNGraph::InitGraph() { graphNode->cleanup(); } #endif + ExtractConstantNodes(); + ExecuteConstantNodesOnly(); } @@ -390,6 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() { } } +void MKLDNNGraph::ExtractConstantNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantNodes"); + for (auto& graphNode : graphNodes) { + if (graphNode->isConstant()) + constantGraphNodes.emplace_back(graphNode); + else + mutableGraphNodes.emplace_back(graphNode); + } +} + void MKLDNNGraph::ExecuteConstantNodesOnly() { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); mkldnn::stream stream(eng); @@ -418,10 +422,7 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs); }; - for (auto &graphNode : graphNodes) { - if (!graphNode->isConstant()) - continue; - + for (auto &graphNode : constantGraphNodes) { if (weightsCache) { auto sharedOutputs = acquireSharedOutputs(graphNode); @@ -437,9 +438,9 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { } } -static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) { - memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc); - memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc); +static bool isReorderAvailable(const MemoryDesc& parentDesc, const MemoryDesc& childDesc, const mkldnn::engine& eng) { + memory::desc dstMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(childDesc); + memory::desc srcMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(parentDesc);; mkldnn::primitive_attr attr; dnnl_primitive_desc_t result = nullptr; @@ -471,14 +472,14 @@ void MKLDNNGraph::InitEdges() { if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) { // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion - const auto inDesc = edge->getInputDesc(); - const auto outDesc = edge->getOutputDesc(); + const auto& inDesc = edge->getInputDesc(); + const auto& outDesc = edge->getOutputDesc(); std::string convertName = edge->getParent()->getName() + "_" + inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name(); - auto convertNode = std::make_shared(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName, - this->getEngine(), this->weightsCache); + auto convertNode = std::make_shared(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(), + convertName, this->getEngine(), this->weightsCache); convertNode->setDescs(inDesc, outDesc); InsertNode(edge, convertNode, true); @@ -492,7 +493,7 @@ void MKLDNNGraph::InitEdges() { if (insertReorder) { std::string basicLayerName = edge->getParent()->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + + MKLDNNReorderNode::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + edge->getChild()->getName(); std::string layerName = basicLayerName; int idx = 0; @@ -601,22 +602,10 @@ void MKLDNNGraph::AllocateWithReuse() { int e_start = edge->getParent()->execIndex; int e_finish = edge->getChild()->execIndex; - const BlockingDesc block_desk = edge->getDesc().getBlockingDesc(); - - int64_t e_size = block_desk.getOffsetPadding() + 1; // size in bytes (from begin of data to last element) - for (int j = 0; j < block_desk.getBlockDims().size(); j++) - e_size += (block_desk.getBlockDims()[j] - 1) * block_desk.getStrides()[j]; - - // In some cases computational formula above doesn't work properly (e.g. for OhIw8o4i layout). - // This WA allows to limit the size of allocated memory from below. - // TODO: need to properly investigate the root cause of incorrect computations - int64_t min_size = 1; - for (int64_t dim : block_desk.getBlockDims()) { - min_size *= dim; + int64_t e_size = edge->getDesc().getCurrentSize(); // size in bytes (from the beginning of data to the last element) + if (e_size == MemoryDesc::UNDEFINED_SIZE) { + IE_THROW() << "Can not allocate memory since the size is undefined."; } - e_size = std::max(e_size, min_size); - - e_size *= edge->getDesc().getPrecision() == Precision::BIN ? 1 : edge->getDesc().getPrecision().size(); box.start = std::min(e_start, box.start); box.finish = std::max(e_finish, box.finish); @@ -650,7 +639,7 @@ void MKLDNNGraph::AllocateWithReuse() { size_t total_size = static_cast(memSolver.solve()) * alignment; memWorkspace = std::make_shared(eng); - memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C))); + memWorkspace->Create(MKLDNNMemoryDesc({total_size}, mkldnn::memory::data_type::s8)); if (edge_clusters.empty()) return; @@ -710,13 +699,11 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: auto input = inputNodesMap.find(name); if (input != inputNodesMap.end()) { - MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims(); - const void *ext_data_ptr = in->cbuffer(); void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tdesc = MKLDNNMemoryDesc {in->getTensorDesc()}; + auto ext_tdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(in->getTensorDesc()); auto ext_mem = MKLDNNMemory(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); @@ -727,7 +714,8 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: // todo: make sure 'name' exists in this map... if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast(inter_data_ptr), + _normalizePreprocMap[name].NormalizeImage(input->second->getChildEdgeAt(0)->getShape(), + reinterpret_cast(inter_data_ptr), in->getTensorDesc().getLayout()); } else { IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported"; @@ -775,7 +763,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) { MB_to_process = std::min(config.batchLimit, MB_to_process); size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB; - const auto actualDesc = node->getParentEdgeAt(0)->getDesc(); + const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc()); const auto expectedDesc = ext_blob->getTensorDesc(); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it @@ -790,7 +778,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) { } if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { - auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc}; + auto outBlobDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(expectedDesc); auto outBloMem = MKLDNNMemory(eng); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); @@ -810,24 +798,30 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) { ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count)); - for (int i = 0; i < graphNodes.size(); i++) { - if (request != nullptr) { +#ifdef CPU_DEBUG_CAPS + for (const auto& node : constantGraphNodes) { + if (request != nullptr) request->ThrowIfCanceled(); - } - PERF(graphNodes[i]); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); + } +#endif + + for (const auto& node : mutableGraphNodes) { + PERF(config.collectPerfCounters, node); + if (request != nullptr) + request->ThrowIfCanceled(); if (batch > 0) - graphNodes[i]->setDynamicBatchLim(batch); + node->setDynamicBatchLim(batch); - ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); - if (!graphNodes[i]->isConstant()) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute); - graphNodes[i]->execute(stream); - } + OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute); + node->execute(stream); - ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); } if (infer_count != -1) infer_count++; @@ -889,7 +883,7 @@ void MKLDNNGraph::SortTopologically() { // Make first N (N == port_num) edge indexes are matched with port index for (auto &node : graphNodes) { { - int port_num = node->inDims.size(); + int port_num = node->inputShapes.size(); std::vector res(port_num); for (int i = 0; i < node->parentEdges.size(); i++) { @@ -903,7 +897,7 @@ void MKLDNNGraph::SortTopologically() { node->parentEdges = {res.begin(), res.end()}; } { - int port_num = node->outDims.size(); + int port_num = node->outputShapes.size(); std::vector res(port_num); for (int i = 0; i < node->childEdges.size(); i++) { @@ -965,16 +959,20 @@ Config MKLDNNGraph::getProperty() const { return config; } -void MKLDNNGraph::getInputBlobs(InferenceEngine::BlobMap &resp) { - for (auto &it : inputNodesMap) { - resp[it.first] = it.second->getChildEdgeAt(0)->getBlob(); +Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) { + auto itr = inputNodesMap.find(name); + if (itr != inputNodesMap.end()) { + return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory()); } + return nullptr; } -void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) { - for (auto &it : outputNodesMap) { - resp[it.first] = it.second->getParentEdgeAt(0)->getBlob(); +Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) { + auto itr = outputNodesMap.find(name); + if (itr != outputNodesMap.end()) { + return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory()); } + return nullptr; } void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) { @@ -1084,7 +1082,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentConv->inDims.push_back(newEdge->getDims()); + parentConv->inputShapes.push_back(Shape(newEdge->getShape())); } } @@ -1116,15 +1114,14 @@ void MKLDNNGraph::RemoveDroppedEdges() { } } -MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc, - bool isOptimized, InferenceEngine::Blob::Ptr scales) { +MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, + bool isOptimized) { MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache)); auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode"; } reorderPtr->setDescs(inDesc, outDesc); - reorderPtr->_scales = scales; reorderPtr->setOptimized(isOptimized); InsertNode(edge, newReorder, true); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 213fb3b0d54603..50ccd0be4f0132 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -44,8 +44,8 @@ class MKLDNNGraph { void setProperty(const std::map &properties); Config getProperty() const; - void getInputBlobs(InferenceEngine::BlobMap &in_map); - void getOutputBlobs(InferenceEngine::BlobMap &out_map); + InferenceEngine::Blob::Ptr getInputBlob(const std::string& name); + InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name); template void CreateGraph(NET &network, @@ -115,17 +115,17 @@ class MKLDNNGraph { * @param layerName * Reorder layer name * @param inDesc - * input tensor descriptor + * input memory descriptor * @param outDesc - * output tensor descriptor + * output memory descriptor * @param isOptimized * optimization flag; if isOptimized is true then Reorder node does nothing * @param scales * pointer to the blob containing scales * @return pointer to the new Reorder node. */ - MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, - const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr); + MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, + const MemoryDesc& outDesc, bool isOptimized = false); /** * @brief Insert MKLDNNNode at the edge-specified location. @@ -218,6 +218,7 @@ class MKLDNNGraph { void Allocate(); void AllocateWithReuse(); void CreatePrimitives(); + void ExtractConstantNodes(); void ExecuteConstantNodesOnly(); friend class MKLDNNInferRequest; @@ -225,6 +226,11 @@ class MKLDNNGraph { friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: + // these node pointers (from graphNodes) are to avoid regular checking for + // constant node in ExecuteConstantNodesOnly and Infer methods + std::vector constantGraphNodes; + std::vector mutableGraphNodes; + void EnforceBF16(); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp index ac4bfff6b6d2f0..909a5083f71242 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp @@ -10,7 +10,6 @@ #include #include "ngraph/ngraph.hpp" #include "utils/debug_capabilities.h" - #include #include #include @@ -46,11 +45,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no std::string outputPrecisionsStr; if (!node->getChildEdges().empty()) { - outputPrecisionsStr = node->getChildEdgeAt(0)->getDesc().getPrecision().name(); + outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); bool isAllEqual = true; for (size_t i = 1; i < node->getChildEdges().size(); i++) { - if (node->getChildEdgeAt(i-1)->getDesc().getPrecision() != node->getChildEdgeAt(i)->getDesc().getPrecision()) { + if (node->getChildEdgeAt(i - 1)->getMemory().GetDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision()) { isAllEqual = false; break; } @@ -59,12 +58,12 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output precisions are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < node->getChildEdges().size(); i++) - outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getDesc().getPrecision().name()); + outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision().name()); } } else { // Branch to correctly handle output nodes if (!node->getParentEdges().empty()) { - outputPrecisionsStr = node->getParentEdgeAt(0)->getDesc().getPrecision().name(); + outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); } } serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; @@ -73,12 +72,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no auto outDescs = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs; if (!outDescs.empty()) { - auto fmt0 = MKLDNNMemoryDesc(outDescs[0].desc).getFormat(); - outputLayoutsStr = mkldnn::utils::fmt2str(fmt0); + outputLayoutsStr = outDescs[0].desc->serializeFormat(); bool isAllEqual = true; for (size_t i = 1; i < outDescs.size(); i++) { - if (MKLDNNMemoryDesc(outDescs[i - 1].desc).getFormat() != MKLDNNMemoryDesc(outDescs[i].desc).getFormat()) { + if (outDescs[i - 1].desc->serializeFormat() != outDescs[i].desc->serializeFormat()) { isAllEqual = false; break; } @@ -87,8 +85,7 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output layouts are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < outDescs.size(); i++) { - auto fmt = MKLDNNMemoryDesc(outDescs[i].desc).getFormat(); - outputLayoutsStr += "," + std::string(mkldnn::utils::fmt2str(fmt)); + outputLayoutsStr += "," + outDescs[i].desc->serializeFormat(); } } } else { @@ -163,10 +160,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph auto meta_data = extract_node_metadata(node); std::shared_ptr return_node; if (is_input) { - auto desc = node->getChildEdgeAt(0)->getDesc(); - auto param = std::make_shared( - details::convertPrecision(desc.getPrecision()), - ngraph::PartialShape(desc.getDims())); + auto& desc = node->getChildEdgeAt(0)->getMemory().GetDesc(); + auto param = std::make_shared(details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); return_node = param; params.push_back(param); } else if (is_output) { @@ -177,10 +172,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()); for (size_t port = 0; port < return_node->get_output_size(); ++port) { - auto desc = node->getChildEdgeAt(port)->getDesc(); - return_node->set_output_type(port, - details::convertPrecision(desc.getPrecision()), - ngraph::PartialShape(desc.getDims())); + auto& desc = node->getChildEdgeAt(port)->getMemory().GetDesc(); + return_node->set_output_type(port, details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); } } @@ -237,18 +230,19 @@ void serializeToXML(const MKLDNNGraph &graph, const std::string& path) { void serializeToCout(const MKLDNNGraph &graph) { for (const auto& node : graph.GetNodes()) { std::cout << "name: " << node->getName() << " [ "; - if (!node->getParentEdges().empty()) { - const auto& parentEdge = *(node->getParentEdges()[0].lock()); - const auto& prnt_out_desc = parentEdge.getOutputDescRO(); - std::cout << "in: " << prnt_out_desc.getPrecision().name() - << "/l=" << prnt_out_desc.getLayout() - << "; "; - } - if (!node->getChildEdges().empty()) { - const auto& childEdge = *(node->getChildEdges()[0].lock()); - const auto& chld_in_desc = childEdge.getInputDescRO(); - std::cout << "out: " << chld_in_desc.getPrecision().name() - << "/l=" << chld_in_desc.getLayout(); + auto nodeDesc = node->getSelectedPrimitiveDescriptor(); + if (nodeDesc) { + auto& inConfs = nodeDesc->getConfig().inConfs; + if (!inConfs.empty()) { + std::cout << "in: " << inConfs.front().desc->getPrecision().name() + << "/l=" << inConfs.front().desc->serializeFormat() + << "; "; + } + auto& outConfs = nodeDesc->getConfig().outConfs; + if (!outConfs.empty()) { + std::cout << "out: " << outConfs.front().desc->getPrecision().name() + << "/l=" << outConfs.front().desc->serializeFormat(); + } } std::cout << " ]" << std::endl; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 9811b683ad19e3..9cbc9b79aeb983 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -43,6 +43,7 @@ #include #include "mkldnn_itt.h" +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -165,15 +166,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) return false; - auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); - auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(), + auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getShape().getDims(); + auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getShape().getDims(), convOutDims.size()); // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases. // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant. if (convOutDims.size() != biasDims.size() || biasDims.size() < 2) return false; - if (biasDims[0] != 1 || biasDims[1] != convOutDims[1]) + if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1])) return false; for (int i = 2; i < biasDims.size(); i++) { @@ -249,8 +250,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parent->outDims[inNum] = MKLDNNDims({parentEltwise->outDims[0][1]}); - parentEltwise->inDims.push_back(parent->outDims[0]); + parent->outputShapes[inNum] = Shape(SizeVector{parentEltwise->outputShapes[0].getStaticDims()[1]}); + parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } } @@ -299,17 +300,17 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) { + auto isSutableSecondInput = [](MKLDNNNodePtr node, SizeVector dataDims) { if (node->getType() != Input || !node->isConstant()) return false; - auto secondInputDims = node->outDims[0]; - if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2) + auto secondInputDims = node->outputShapes[0].getDims(); + if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) return false; - if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1]) + if (secondInputDims[0] != 1 || !dimsEqualStrong(secondInputDims[1], dataDims[1])) return false; - for (size_t i = 2; i < secondInputDims.ndims(); i++) { + for (size_t i = 2; i < secondInputDims.size(); i++) { if (secondInputDims[i] != 1) return false; } @@ -322,14 +323,14 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) return false; - return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims()); + return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getShape().getDims()); }; auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; - return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims()); + return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getShape().getDims()); }; auto parent = graphNodes.begin(); @@ -397,7 +398,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentEltwise->inDims.push_back(parent->outDims[0]); + parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } } @@ -416,9 +417,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { bool retVal = false; if (node->getType() == Convolution) { if (auto convNode = std::dynamic_pointer_cast(node)) { - auto ndims = convNode->getParentEdgeAt(0)->getDims().ndims(); + auto rank = convNode->getParentEdgeAt(0)->getShape().getRank(); // int8 depthwise convolution does not support fusing zero points in 3D case - if (implication(convNode->isDepthWise(), ndims == 4)) { + if (implication(convNode->isDepthWise(), rank == 4)) { retVal = true; } } @@ -431,8 +432,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (convNode == nullptr) IE_THROW() << "Cannot get convolution node " << node->getName(); - int IC = node->getParentEdgesAtPort(0)[0]->getDims()[1]; - int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; + int IC = node->getParentEdgesAtPort(0)[0]->getShape().getDims()[1]; + int OC = node->getChildEdgesAtPort(0)[0]->getShape().getDims()[1]; + + if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) { + return false; + } if (parent0->getType() == Eltwise) { if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) @@ -456,15 +461,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) return false; - if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) { + if (parent0->getParentEdgesAtPort(1)[0]->getShape().getRank() < 2) { return false; } - auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getDims(); - if (zpDims[0] != 1 || zpDims[1] != IC) + auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims(); + if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) return false; - for (int i = 2; i < zpDims.ndims(); i++) { + for (int i = 2; i < zpDims.size(); i++) { if (zpDims[i] != 1) return false; } @@ -485,7 +490,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (zeroPointsData == nullptr) IE_THROW() << "zeroPointsBlob has not allocated buffer"; - for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[1]; j++) { + auto zeroPointDataSize = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims()[1]; + if (Shape::UNDEFINED_DIM == zeroPointDataSize) { + return false; + } + + for (int j = 0; j < zeroPointDataSize; j++) { convNode->inputZeroPoints.push_back(zeroPointsData[j]); } } else { @@ -524,11 +534,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { ptrdiff_t G = convNode->getGroupNum(); const int groupOffset = convNode->getAlgorithm() == ConvolutionGrouped ? 1 : 0; - ptrdiff_t OC = weightsConstant->outDims[0][0 + groupOffset]; - ptrdiff_t IC = weightsConstant->outDims[0][1 + groupOffset]; - ptrdiff_t KD = weightsConstant->outDims[0].ndims() == (5 + groupOffset) ? weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 3] : 1; - ptrdiff_t KH = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 2]; - ptrdiff_t KW = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 1]; + auto& weightsConstantDims = weightsConstant->outputShapes[0].getStaticDims(); + + ptrdiff_t OC = weightsConstantDims[0 + groupOffset]; + ptrdiff_t IC = weightsConstantDims[1 + groupOffset]; + ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1; + ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2]; + ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1]; for (size_t g = 0; g < G; g++) { for (size_t oc = 0; oc < OC; oc++) { @@ -588,7 +600,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra auto& graphNodes = graph.GetNodes(); auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getDims().ndims() != 3; + return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3; }; auto parent = graphNodes.begin(); @@ -653,12 +665,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { const auto &strides = conv->getStride(); const auto &paddings = conv->getPaddingL(); - const auto &inDims = node->getParentEdgeAt(0)->getDims(); - const auto &outDims = node->getChildEdgeAt(0)->getDims(); + const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims(); + const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims(); bool isSupportedParams = conv->getGroupNum() == 1 && - inDims.ndims() == 4 && - inDims[inDims.ndims() - 1] == outDims[outDims.ndims() - 1] && - inDims[inDims.ndims() - 2] == outDims[outDims.ndims() - 2] && + inDims.size() == 4 && + dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) && + dimsEqualStrong(inDims[inDims.size() - 2], outDims[outDims.size() - 2]) && is1x1Convolution(conv) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) && everyone_is(0, paddings[paddings.size() - 1], paddings[paddings.size() - 2]) && @@ -702,8 +714,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { const auto weightRank = convChild->getWeightDims().size(); const auto stridesSize = convChild->getStride().size(); - bool isSupportedParams = convChild->outDims[0][1] == convChild->getGroupNum() && - convChild->outDims[0][1] != 1 && + bool isSupportedParams = dimsEqualStrong(convChild->outputShapes[0].getDims()[1], convChild->getGroupNum()) && + convChild->outputShapes[0].getDims()[1] != 1 && everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) && everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) && everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) && @@ -711,14 +723,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] && withBias && one_of(convChild->getStride()[stridesSize - 1], 1, 2) && - childNode->getChildEdgeAt(0)->getDims().ndims() == 4; + childNode->getChildEdgeAt(0)->getShape().getRank() == 4; return isSupportedParams; }; auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { - auto inDims = childNode->inDims[0]; - auto outDims = childNode->outDims[0]; + if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) { + return false; + } + + auto inDims = childNode->inputShapes[0].getStaticDims(); + auto outDims = childNode->outputShapes[0].getStaticDims(); int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size(); int L3_cache_size = utils::get_cache_size(3, false); @@ -1076,9 +1092,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG if (mergedConv->fusedWith.size() > 0 && (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) { // Merged with DW_conv. Shape may change - mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]); + mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->outputShapes[0]); } else { - mergedConv->inDims.push_back(mergedConv->outDims[0]); + mergedConv->inputShapes.push_back(mergedConv->outputShapes[0]); } size_t childIdx = 0lu; @@ -1352,7 +1368,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parent->outDims[inNum] = child->inDims[outNum]; + parent->outputShapes[inNum] = child->inputShapes[outNum]; } } else { MKLDNNEdgePtr &remEdge = p_edge; @@ -1373,7 +1389,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentNode->inDims.push_back(parent->outDims[0]); + parentNode->inputShapes.push_back(parent->outputShapes[0]); } } @@ -1400,16 +1416,6 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { if (nn == nullptr) IE_THROW() << "Cannot get reorder layer " << nextNode->getName(); - auto scales = n->_scales; - - if (n->_scales != nullptr && nn->_scales != nullptr) { - IE_THROW() << "Merging scales of two subsequent reorders is unsupported yet"; - } else { - if (scales == nullptr) { - scales = nn->_scales; - } - } - MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent(); MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild(); @@ -1430,7 +1436,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); - graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales); + graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false); graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end()); } } @@ -1448,8 +1454,8 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { MKLDNNNodePtr& broadcastNode = graphNode; MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); - eltwiseNode->inDims[broadcastNode->getChildEdgeAt(0)->getOutputNum()] - = broadcastNode->getParentEdgeAt(0)->getDims(); + eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()] + = broadcastNode->getParentEdgeAt(0)->getShape(); auto& edges = graph.GetEdges(); for (size_t i = 1lu; i < broadcastNode->getParentEdges().size(); i++) { @@ -1673,9 +1679,14 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { } auto& transposeOrder = transposeNode->getOrder(); - auto& layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); - auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder(); - auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); + auto layoutOrder = MemoryDescUtils::convertToBlockedDescriptor( + *transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc).getOrder(); + + auto inBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc); + auto outBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc); + + auto& inOrder = inBlockedDesc.getOrder(); + auto& outOrder = outBlockedDesc.getOrder(); if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { return false; @@ -1751,18 +1762,18 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { graph.DropNode(parentNode); graph.DropNode(childNode); - auto inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; - auto outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; + auto& inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + auto& outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; - auto inPrec = inDesc.getPrecision(); - auto outPrec = outDesc.getPrecision(); + auto inPrec = inDesc->getPrecision(); + auto outPrec = outDesc->getPrecision(); - auto reorderInDesc = TensorDesc(inDesc); - auto reorderOutDesc = TensorDesc(outDesc); - reorderOutDesc.setPrecision(inPrec); + auto reorderInDesc = inDesc->clone(); + auto reorderOutDesc = outDesc->clone(); + reorderOutDesc->setPrecision(inPrec); std::string reorderlayerName = parentParentNode->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake"; + MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; MKLDNNEdgePtr edge; for (auto &childEdge : parentParentNode->getChildEdges()) { @@ -1775,17 +1786,17 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges."; } - auto reorderNode = graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true); + auto reorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, true); // case 2 if (inPrec != outPrec) { - auto reorderInDesc2 = TensorDesc(reorderOutDesc); - auto reorderOutDesc2 = TensorDesc(outDesc); + auto reorderInDesc2 = reorderOutDesc->clone(); + auto reorderOutDesc2 = outDesc->clone(); std::string reorderLayerName2 = reorderNode->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName(); + MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); - graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false); + graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false); } }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 738604a6f0a6ac..9aa0fff3fa55c4 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -213,8 +213,6 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: InferenceEngine::Blob::Ptr data; if (graph->hasInputWithName(name)) { - InferenceEngine::BlobMap blobs; - graph->getInputBlobs(blobs); // ROI blob is returned only if it was set previously. auto it = _preProcData.find(name); if (it != _preProcData.end()) { @@ -223,7 +221,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (_inputs.find(name) == _inputs.end()) { - InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc(); + auto pBlob = graph->getInputBlob(name); + if (!pBlob) { + IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; + } + + InferenceEngine::TensorDesc desc = pBlob->getTensorDesc(); if (_networkInputs.find(name) != _networkInputs.end()) { InferenceEngine::Layout l = _networkInputs[name]->getLayout(); @@ -235,7 +238,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: _inputs[name] = make_blob_with_precision(desc); _inputs[name]->allocate(); - if (blobs[name]->getTensorDesc() == desc && + if (pBlob->getTensorDesc() == desc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = _inputs[name]->buffer(); } @@ -258,9 +261,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (graph->hasOutputWithName(name)) { - InferenceEngine::BlobMap blobs; - graph->getOutputBlobs(blobs); if (_outputs.find(name) == _outputs.end()) { + auto pBlob = graph->getOutputBlob(name); + if (!pBlob) { + IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; + } + if (!data) { InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc(); desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision())); @@ -275,7 +281,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: data = make_blob_with_precision(desc); data->allocate(); } else { - const auto& expectedTensorDesc = blobs[name]->getTensorDesc(); + const auto& expectedTensorDesc = pBlob->getTensorDesc(); if (expectedTensorDesc.getPrecision() != data->getTensorDesc().getPrecision()) { IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: " @@ -295,7 +301,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } _outputs[name] = data; - if (!externalPtr.count(name) && data->getTensorDesc() == blobs[name]->getTensorDesc() && !graph->getProperty().batchLimit) { + if (!externalPtr.count(name) && data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } } @@ -366,12 +372,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch."; } - InferenceEngine::BlobMap blobs; - graph->getInputBlobs(blobs); - if (blobs.find(name) == blobs.end()) + auto pBlob = graph->getInputBlob(name); + if (!pBlob) { IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; + } - if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() && + if (data->getTensorDesc() == pBlob->getTensorDesc() && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -404,12 +410,11 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch."; } - InferenceEngine::BlobMap blobs; - graph->getOutputBlobs(blobs); - if (blobs.find(name) == blobs.end()) + auto pBlob = graph->getOutputBlob(name); + if (!pBlob) IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; - if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() && + if (data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -435,6 +440,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { auto& child = input->second->getChildEdgeAt(i)->getChild(); if (child->isConstant()) canBeInPlace = false; + auto* concat = dynamic_cast(child.get()); if (canBeInPlace && concat && concat->isOptimized()) canBeInPlace = false; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index 6d82ccf3e229d7..a6a64120f00172 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -2,23 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include -#include #include #include #include -#include #include "utils/general_utils.h" #include #include +#include #include "mkldnn_memory.h" #include "mkldnn_extension_utils.h" #include "nodes/common/cpu_memcpy.h" #include "nodes/common/cpu_convert.h" #include "mkldnn/ie_mkldnn.h" +#include "cpu_shape.h" +#include "cpu_memory_desc_utils.h" +#include "mkldnn_extension_utils.h" using namespace InferenceEngine; using namespace mkldnn; @@ -54,7 +55,7 @@ void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, format = memory::format_tag::any; } - memory::desc desc = MKLDNNMemoryDesc({dims}, data_type, format); + memory::desc desc = MKLDNNMemoryDesc(MKLDNNExtensionUtils::convertToSizeVector(dims), data_type, format); Create(desc, data); } @@ -89,10 +90,16 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo } } +void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { + pMemDesc = desc.clone(); + Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc)), data, pads_zeroing); +} + + void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { if (size != 0) IE_ASSERT(size <= output.GetDescriptor().get_size()); - if (input.GetDesc() == output.GetDesc()) { + if (input.GetDescriptor() == output.GetDescriptor()) { auto srcPtr = static_cast(input.GetPtr()); auto dstPtr = static_cast(output.GetPtr()); @@ -118,7 +125,7 @@ void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &ou MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount()); MKLDNNMemory tmpMem(output.eng); - tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data()); + tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetMKLDNNDesc().getFormat(), tmpBuff.data()); pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); srcMemoryPtr = tmpMem.prim; @@ -189,8 +196,8 @@ void MKLDNNMemory::FillZero() { memset(dataPtr, 0, GetSize()); } -memory::format_tag MKLDNNMemory::GetPlainFormat(const memory::dims& dims) { - switch (dims.size()) { +memory::format_tag MKLDNNMemory::GetPlainFormatByRank(size_t rank) { + switch (rank) { case 0: case 1: return memory::format_tag::a; @@ -222,11 +229,6 @@ InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(const memory::dims& dims) { } } -bool MKLDNNMemory::isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format) { - memory::desc attempt(dims, memory::data_type::f32, format, true); - return static_cast(attempt); -} - Precision MKLDNNMemory::convertToIePrec(memory::data_type dataType) { return MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType); } @@ -262,6 +264,42 @@ std::string MKLDNNMemory::formatToString(memory::format_tag fmt) { return mkldnn::utils::fmt2str(fmt); } +void *MKLDNNMemory::GetPtr() const { + auto ptr = static_cast(GetData()); + auto md = GetDescriptor().data; + mkldnn::impl::memory_desc_wrapper wrapper(md); + ptr += wrapper.offset0() * wrapper.data_type_size(); + return ptr; +} + +template<> +MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType() const { + if (auto descPtr = dynamic_cast(pMemDesc.get())) { + return *descPtr; + } else { + switch (pMemDesc->getType()) { + case (MemoryDescType::Blocked): + return MemoryDescUtils::convertToMKLDNNMemoryDesc(*(pMemDesc->as())); + default: + IE_THROW() << "Can not convert unsupported memory descriptor"; + } + } +} + +template<> +BlockedMemoryDesc MKLDNNMemory::GetDescWithType() const { + if (auto descPtr = dynamic_cast(pMemDesc.get())) { + return *descPtr; + } else { + switch (pMemDesc->getType()) { + case (MemoryDescType::Mkldnn): + return MemoryDescUtils::convertToBlockedDescriptor(*(pMemDesc->as())); + default: + IE_THROW() << "Can not convert unsupported memory descriptor"; + } + } +} + bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const { return this->desc == rhs.desc; } @@ -274,51 +312,42 @@ MKLDNNMemoryDesc::operator mkldnn::memory::desc() const { return desc; } -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType, - mkldnn::memory::format_tag format): desc(dims, dataType, mkldnn::memory::format_tag::any) { +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) : + MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) { + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; +} + +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) + : MemoryDesc(Shape(_dims), Mkldnn) { + if (format == memory::format_tag::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; if (format != memory::format_tag::undef) { - if (format == memory::format_tag::x && dims.size() == 0) { + if (format == memory::format_tag::x && _dims.size() == 0) { desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); } else { - desc = mkldnn::memory::desc(dims, dataType, format); + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, format); } } else { // Trying to create plain descriptor // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D - mkldnn::memory::dims strides(dims.size(), 1); - for (int d = dims.size() - 2; d >= 0; d--) { - strides[d] = strides[d + 1] * dims[d + 1]; + mkldnn::memory::dims strides(_dims.size(), 1); + for (int d = _dims.size() - 2; d >= 0; d--) { + strides[d] = strides[d + 1] * _dims[d + 1]; } - desc = mkldnn::memory::desc(dims, dataType, strides); + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, strides); } } -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType) : desc() { - const auto ndims = dims.size(); +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType) + : MemoryDesc(Shape(_dims), Mkldnn), desc() { + const auto ndims = _dims.size(); mkldnn::memory::dims plain_strides(ndims, 1); for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; - } - desc = {dims, dataType, plain_strides}; -} - -size_t MKLDNNMemoryDesc::GetElementSize() const { - const auto type = desc.data_type(); - switch (type) { - case memory::data_type::f16 : - case memory::data_type::bf16 : - return 2; - case memory::data_type::f32 : - case memory::data_type::s32 : - return 4; - case memory::data_type::s8 : - case memory::data_type::u8 : - case memory::data_type::bin : - return 1; - default: - IE_THROW() << "Unknown data type"; + plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; } + desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides}; } static const std::map> form_tags_by_ndims { @@ -677,32 +706,92 @@ bool MKLDNNMemoryDesc::isTailCFormat() const { return is_tailc_strides; } +bool MKLDNNMemoryDesc::blocksExtended() const { + for (int i = 0; i < desc.data.ndims; i++) { + if (desc.data.dims[i] != desc.data.padded_dims[i]) + return true; + } + return false; +} + +size_t MKLDNNMemoryDesc::getMemSizeImp() const { + return desc.get_size(); +} + +size_t MKLDNNMemoryDesc::getElementOffset(size_t elemNumber) const { + mkldnn::impl::memory_desc_wrapper wrapped(desc.data); + return wrapped.off_l(elemNumber); +} + +bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const { + if (MemoryDescType::Blocked == rhs.getType()) { + return isCompatible(*(rhs.as())); + } else if (MemoryDescType::Mkldnn == rhs.getType()) { + return isCompatible(*(rhs.as())); + } else { + return false; + } +} + +bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { + using namespace dnnl; + using namespace impl; + using namespace dnnl::impl::utils; + if (this->desc == rhs.desc) { + return true; + } + mkldnn::impl::memory_desc_wrapper wrappedThis(this->desc.data); + mkldnn::impl::memory_desc_wrapper wrappedRhs(rhs.desc.data); + if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) + return false; + if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false; + + const auto &blk = wrappedThis.blocking_desc(); + const auto &r_blk = wrappedRhs.blocking_desc(); + + int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0; //ignore batch axis stride if batch size == 1 + + // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check. + return wrappedThis.ndims() == wrappedRhs.ndims() + && wrappedThis.format_kind() == wrappedRhs.format_kind() + && wrappedThis.data_type() == wrappedRhs.data_type() + && array_cmp(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) + && array_cmp(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) + && blk.inner_nblks == r_blk.inner_nblks + && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks) + && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks) + && array_cmp(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) + && array_cmp(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) + && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0()); +} + + /** - * Convert to IE::TensorDesc + * Check compatibility with BlockedMemoryDesc * * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} * strides // the order of outer dims is encoded here * inner_blks 4 16 4 * inner_idxs 1 0 1 * - * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. - * How to convert into IE representation: + * BlockedMemoryDesc desc has more expressive ability. + * How to check compatibility with BlockedMemoryDesc representation: * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. IE strides : concatenate strides in new_outer_order and inner strides. - * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. IE order : concatenate new_outer_order and inner_idxs + * 1. BlockedMemoryDesc strides : concatenate strides in new_outer_order and inner strides. + * 2. BlockedMemoryDesc dims : concatenate outer dims in new_outer_order with auto padding and inner blocks + * 3. BlockedMemoryDesc order : concatenate new_outer_order and inner_idxs */ -MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { - const auto dims = desc.dims(); - if (desc.data.format_kind == dnnl_format_kind_any) - return TensorDesc { - MKLDNNMemory::convertToIePrec(desc.data_type()), - SizeVector {begin(dims), end(dims)}, - Layout::ANY}; +bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { + return false; + } + + const auto dims = desc.dims(); - if (desc.data.format_kind != dnnl_blocked) - IE_THROW() << "Conversion is not possible"; + if (desc.data.format_kind != dnnl_blocked) { + return false; + } const auto &blk_desc = desc.data.format_desc.blocking; @@ -731,174 +820,99 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { std::iota(outer_order.begin(), outer_order.end(), 0); std::sort(outer_order.begin(), outer_order.end(), [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); - // IE blocked order + // blocked order // [new_outer_order] U [inner_idxs] - SizeVector ie_blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); - - // IE blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector ie_blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), - [&] (size_t i) { return blk_desc.strides[i]; }); - - // IE blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector ie_blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - ie_blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); + SizeVector blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); - // IE offset padded to data. Same as for oneDNN - SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - size_t ie_blk_offset0 = desc.data.offset0; - - // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. - // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will - // fill it with zero. - ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); - - - BlockingDesc ie_blk_desc { ie_blk_dims, - ie_blk_order, - ie_blk_offset0, - ie_blk_offset_to_data, - ie_blk_strides }; - TensorDesc res { - MKLDNNMemory::convertToIePrec(desc.data_type()), - SizeVector {begin(dims), end(dims)}, - ie_blk_desc }; - // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC. - // But for some cases we have to specify it more correctly.. may be.. or just keep - // auto detected layout in constructor of TensorDesc. - return res; -} - -/** - * Construct from IE::TensorDesc - * @param tDesc - * - * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} - * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. - * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence - * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims - * - * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) - * - * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of - * real dims spliting. - * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] - * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims - * Normalization of representation: Make strides growing but keep layout same as original. Not all - * layout allow us to meet normalize form of tensor desc. - * - * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] - */ -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): - desc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef) { - auto dims = tDesc.getDims(); - - // TODO: implicit conversion of dims is no good... - if (tDesc.getLayout() == Layout::SCALAR) { - desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = 1; - desc.data.dims[0] = 1; - desc.data.padded_dims[0] = 1; - desc.data.format_desc.blocking.strides[0] = 1; - desc.data.padded_offsets[0] = 0; - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - return; - } - - if (tDesc.getLayout() == Layout::ANY) { - desc.data.format_kind = dnnl_format_kind_any; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = dims.size(); - std::copy(dims.begin(), dims.end(), desc.data.dims); - std::copy(dims.begin(), dims.end(), desc.data.padded_dims); - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::fill(desc.data.padded_offsets, desc.data.padded_offsets + dims.size(), 0); - return; + if (!dimsEqualWeak(blk_order, rhs.getOrder())) { + return false; } - auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); - auto ie_order = tDesc.getBlockingDesc().getOrder(); - auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); - auto ie_strides = tDesc.getBlockingDesc().getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); + //TODO [DS]: undefined offset is also used now as an indicator of undefined strides + if (desc.data.offset0 != Shape::UNDEFINED_DIM) { + // blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + SizeVector blk_strides(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), + [&](size_t i) { return blk_desc.strides[i]; }); - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(blk_strides, rhs.getStrides(), skipAxis)) { + return false; + } } - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; + // blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + SizeVector blk_dims(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + blk_dims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; + if (!dimsEqualWeak(blk_dims, rhs.getBlockDims())) { + return false; } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); + // offset padded to data. Same as for oneDNN + SizeVector blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; + // TODO: The BlockedMemoryDesc implementation allow to specify offset_to_data for inner blocked dims. + // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will + // fill it with zero. + blk_offset_to_data.insert(blk_offset_to_data.end(), inner_ndims, 0); + if (!dimsEqualWeak(blk_offset_to_data, rhs.getOffsetPaddingToData())) { + return false; } - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; + return dimsEqualWeak(desc.data.offset0, rhs.getOffsetPadding()); +} - // Fill general memory desc fields - desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = dims.size(); - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::copy(dims.begin(), dims.end(), desc.data.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, desc.data.padded_offsets); - std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - desc.data.padded_dims[idx] *= ie_blkdDims[i]; +bool MKLDNNMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; } +} - // Fill blocking desc - auto &dnn_blk_desc = desc.data.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; +std::string MKLDNNMemoryDesc::serializeFormat() const { + if (desc.data.format_kind == dnnl_format_kind_wino) { + switch (desc.data.format_desc.wino_desc.wino_format) { + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; + default: return "wino_undef"; + } } + auto fmt = getFormat(); + return mkldnn::utils::fmt2str(fmt); } -bool MKLDNNMemoryDesc::blocksExtended() const { - for (int i = 0; i < desc.data.ndims; i++) { - if (desc.data.dims[i] != desc.data.padded_dims[i]) - return true; - } - return false; +bool MKLDNNMemoryDesc::isDefined() const { + return desc.data.offset0 != Shape::UNDEFINED_DIM; +} + +InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const { + return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); +} + +void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) { + desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); } } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index 5de42240dbadc4..d4cf4fc634b9bf 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -6,13 +6,18 @@ #include "ie_layouts.h" #include "mkldnn_dims.h" +#include "cpu_memory_desc.h" +#include "mkldnn_extension_utils.h" #include #include +#include +#include #include #include #include #include +#include /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. @@ -34,20 +39,15 @@ namespace MKLDNNPlugin { * Represent internal plugin abstraction of tensor description * */ -class MKLDNNMemoryDesc { +class MKLDNNMemoryDesc : public MemoryDesc { public: - /** Empty constructor - doesn't define any tensor representation */ - MKLDNNMemoryDesc(): desc() {} - /** Construct a tensor desc with plain layout format (like ND C array) */ - MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType); + MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType); /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */ - MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); - - explicit MKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc); - explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc): desc(desc) {} + MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc); /** * Try to define original format tag use on creation @@ -60,8 +60,6 @@ class MKLDNNMemoryDesc { return static_cast(desc.data.data_type); } - size_t GetElementSize() const; - MKLDNNDims getDims() const { return MKLDNNDims(desc.data.dims, desc.data.ndims); } @@ -75,15 +73,38 @@ class MKLDNNMemoryDesc { bool operator != (const MKLDNNMemoryDesc& rhs) const; operator mkldnn::memory::desc() const; - operator InferenceEngine::TensorDesc() const; + bool isSame(mkldnn::memory::format_tag fmt) const; + + dnnl_format_kind_t getFormatKind() const { + return desc.data.format_kind; + } + + std::unique_ptr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + bool hasLayoutType(LayoutType layoutType) const override; + std::string serializeFormat() const override; + + bool isDefined() const override; + + InferenceEngine::Precision getPrecision() const override; + + void setPrecision(InferenceEngine::Precision prc) override; + + bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const BlockedMemoryDesc& rhs) const; + bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + +private: + size_t getElementOffset(size_t elemNumber) const override; + size_t getMemSizeImp() const override; bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; bool isTailCFormat() const; - bool isSame(mkldnn::memory::format_tag fmt) const; - private: static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); mkldnn::memory::desc desc; @@ -94,6 +115,12 @@ class MKLDNNMemory { public: explicit MKLDNNMemory(const mkldnn::engine& eng); + MKLDNNMemory(const MKLDNNMemory&) = delete; + MKLDNNMemory& operator= (const MKLDNNMemory&) = delete; + + MKLDNNMemory(MKLDNNMemory&&) = default; + MKLDNNMemory& operator= (MKLDNNMemory&&) = default; + const mkldnn::memory& GetPrimitive() const { return *prim; } @@ -106,10 +133,15 @@ class MKLDNNMemory { return prim->get_desc(); } - const MKLDNNMemoryDesc GetDesc() const { - return MKLDNNMemoryDesc {prim->get_desc()}; + const MemoryDesc& GetDesc() const { + return *pMemDesc; } + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T GetDescWithType() const; + /** * Return handler of buffer. Real data may starts from some other offset * @return @@ -126,12 +158,7 @@ class MKLDNNMemory { * Like a GetData() but offset is applied. * @return */ - void* GetPtr() const { - auto ptr = static_cast(GetData()); - ptr += GetDescriptor().data.offset0 * GetDesc().GetElementSize(); - return ptr; - } - + void* GetPtr() const; mkldnn::memory::data_type GetDataType() const { return static_cast(GetDescriptor().data.data_type); @@ -145,19 +172,15 @@ class MKLDNNMemory { return {std::begin(data.dims), std::begin(data.dims) + data.ndims}; } - void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format, - const void* data = nullptr); - - void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); + void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); // Like a plain format void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const; void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const; void FillZero(); - static mkldnn::memory::format_tag GetPlainFormat(const mkldnn::memory::dims& dims); + static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims); - static bool isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format); static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout); static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision); @@ -167,6 +190,17 @@ class MKLDNNMemory { static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0); private: + void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format, + const void* data = nullptr); + + void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); + + const MKLDNNMemoryDesc GetMKLDNNDesc() const { + return MKLDNNMemoryDesc(prim->get_desc()); + } + +private: + MemoryDescPtr pMemDesc; std::shared_ptr prim; mkldnn::engine eng; }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h index aaddd7e4575f86..3cbe768370cd01 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h @@ -8,6 +8,7 @@ #include "blob_factory.hpp" #include "mkldnn_memory.h" #include "nodes/common/cpu_memcpy.h" +#include "cpu_memory_desc_utils.h" #include @@ -17,7 +18,7 @@ class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal { public: MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) : InferenceEngine::IVariableStateInternal{name} { - state = make_blob_with_precision(MKLDNNMemoryDesc(storage->GetDescriptor())); + state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->GetDesc())); state->allocate(); cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index e46c7a7b0bdf9e..7e29589caf9a21 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -55,6 +55,7 @@ #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" #include "nodes/common/cpu_convert.h" +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -72,6 +73,8 @@ static const InferenceEngine::details::caseless_unordered_map { "FullyConnected", FullyConnected }, { "MaxPool", Pooling }, { "AvgPool", Pooling }, + { "AdaptiveMaxPool", AdaptivePooling}, + { "AdaptiveAvgPool", AdaptivePooling}, { "Add", Eltwise }, { "Subtract", Eltwise }, { "Multiply", Eltwise }, @@ -223,7 +226,9 @@ static const InferenceEngine::details::caseless_unordered_map { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator}, { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage}, { "ExtractImagePatches", ExtractImagePatches}, - { "NonMaxSuppressionIEInternal", NonMaxSuppression} + { "NonMaxSuppressionIEInternal", NonMaxSuppression}, + { "MatrixNms", MatrixNms}, + { "MulticlassNms", MulticlassNms} }; Type TypeFromName(const std::string type) { @@ -248,20 +253,16 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { algorithm = Algorithm::Undefined; fusingPort = -1; - const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).is_dynamic()) - IE_THROW() << errorPrefix << " has dynamic input shape on " << i << " port, but CPU plug-in supports only static shape"; - } - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - IE_THROW() << errorPrefix << " has dynamic output shape on " << i << " port, but CPU plug-in supports only static shape"; - } for (size_t i = 0; i < op->get_input_size(); i++) { - const auto &shape = op->get_input_shape(i); - inDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape); + const auto &shape = op->get_input_partial_shape(i); + + bool isScalar = false; + if (shape.rank().is_static()) { + isScalar = shape.rank().get_length() == 0; + } + inputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i))); } @@ -270,8 +271,13 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs."; } for (size_t i = 0; i < op->get_output_size(); i++) { - const auto &shape = op->get_output_shape(i); - outDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape); + const auto &shape = op->get_output_partial_shape(i); + + bool isScalar = false; + if (shape.rank().is_static()) { + isScalar = shape.rank().get_length() == 0; + } + outputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i))); } } @@ -418,9 +424,10 @@ void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector= parent_spd->getConfig().outConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual( - getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc, - parent_spd->getConfig().outConfs[inNum].desc)) { + auto& curDesc = getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc; + auto& parentDesc = parent_spd->getConfig().outConfs[inNum].desc; + + if (curDesc->isCompatible(*parentDesc)) { equalsLocalFormatCount++; } } @@ -455,9 +462,9 @@ bool MKLDNNNode::canBeInPlace() const { return false; } - MKLDNNDims dims = getParentEdgeAt(0)->getDims(); + auto inShape = getParentEdgeAt(0)->getShape(); for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) { - if (getChildEdgeAt(cIdx)->getDims() != dims) { + if (getChildEdgeAt(cIdx)->getShape() != inShape) { return false; } } @@ -465,7 +472,7 @@ bool MKLDNNNode::canBeInPlace() const { } void MKLDNNNode::resolveNotAllocatedEdges() { - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { @@ -476,7 +483,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() { auto * memPtr = reinterpret_cast(parentEdge->getMemory().GetData()); parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); - parentEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().inConfs[i].desc), memPtr); + parentEdge->getMemoryPtr()->Create(*selected_pd->getConfig().inConfs[i].desc, memPtr); parentEdge->changeStatus(MKLDNNEdge::Status::Allocated); } @@ -488,7 +495,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() { auto * memPtr = reinterpret_cast(childEdge->getMemory().GetData()); childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); - childEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().outConfs[i].desc), memPtr); + childEdge->getMemoryPtr()->Create(*selected_pd->getConfig().outConfs[i].desc, memPtr); childEdge->changeStatus(MKLDNNEdge::Status::Allocated); } @@ -543,14 +550,14 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() { // it is mixed precision. if (selectedPrimitiveDesc) { if (!selectedPrimitiveDesc->getConfig().inConfs.empty()) { - if (selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) { - str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision().name()); + if (selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision().name()); } else { str_type += "_I8"; } } else { - if (selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) { - str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision().name()); + if (selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision().name()); } else { str_type += "_I8"; } @@ -579,7 +586,7 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const { } const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) const { - if (idx >= inDims.size()) + if (idx >= inputShapes.size()) IE_THROW() << "Node " << getName() << " contains less input ports than " << idx; std::vector res; @@ -593,7 +600,7 @@ const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) co } const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) const { - if (idx >= outDims.size()) + if (idx >= outputShapes.size()) IE_THROW() << "Node " << getName() << " contains less output ports than " << idx; std::vector res; @@ -607,18 +614,18 @@ const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) con } -std::vector MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc, memory::format_tag::ntc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::ncdhw, memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c}; return {memory::format_tag::any}; } @@ -637,22 +644,22 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { auto itpd = desc.createPrimitiveDescriptorIterator(engine); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i)); - config.inConfs.push_back(dataConfig); + PortConfig portConfig; + portConfig.inPlace = -1; + portConfig.constant = false; + portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + config.inConfs.push_back(portConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = canBeInPlace() ? 0 : -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i)); - config.outConfs.push_back(dataConfig); + PortConfig portConfig; + portConfig.inPlace = canBeInPlace() ? 0 : -1; + portConfig.constant = false; + portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + config.outConfs.push_back(portConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -665,15 +672,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { void MKLDNNNode::filterSupportedPrimitiveDescriptors() { // Compare by partial layout descriptor (without particular strides values) - auto areCompatible = [](const TensorDesc& tdesc, mkldnn::memory::format_tag fmt) { - TensorDesc fmt_tdesc = MKLDNNMemoryDesc{ - MKLDNNDims(tdesc.getDims()), - MKLDNNExtensionUtils::IEPrecisionToDataType(tdesc.getPrecision()), - fmt}; - - auto tmp_partial_tdesc = PartialBlkDesc::extractFrom(fmt_tdesc); - auto actual_partial_tdesc = PartialBlkDesc::extractFrom(tdesc); - return tmp_partial_tdesc == actual_partial_tdesc; + auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool { + MKLDNNMemoryDesc fmt_tdesc = MKLDNNMemoryDesc{desc.getShape().getStaticDims(), + MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + fmt}; + + return desc.isCompatible(fmt_tdesc); }; if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) { @@ -685,11 +689,11 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() { bool isSuitableDesc = true; for (int i = 0; i < inputMemoryFormatsFilter.size(); i++) { - const bool matched = areCompatible(config.inConfs[i].desc, inputMemoryFormatsFilter[i]); + const bool matched = areCompatible(*config.inConfs[i].desc, inputMemoryFormatsFilter[i]); isSuitableDesc &= matched; } for (int i = 0; i < outputMemoryFormatsFilter.size(); i++) { - const bool matched = areCompatible(config.outConfs[i].desc, outputMemoryFormatsFilter[i]); + const bool matched = areCompatible(*config.outConfs[i].desc, outputMemoryFormatsFilter[i]); isSuitableDesc &= matched; } if (!isSuitableDesc) { @@ -701,22 +705,22 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() { } } -void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { +void MKLDNNNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc); - std::vector outDescs; + inDescs.push_back(inConf.desc.get()); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc); - createDescriptor({inDescs}, {outDescs}); + outDescs.push_back(outConf.desc.get()); + createDescriptor(inDescs, outDescs); std::shared_ptr attr = initPrimitiveAttr(); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + NodeConfig rightConfig = selectedPD->getConfig(); size_t selected_count = 0; for (size_t j = 0; j < descs.size(); j++) { const auto &desc = descs[j]; @@ -727,10 +731,10 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { itpd = desc.createPrimitiveDescriptorIterator(engine, *(attr.get())); } while (static_cast(itpd)) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, i); @@ -738,7 +742,7 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, i); @@ -768,23 +772,21 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { return; for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { - if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc)) + if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { - if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc)) + if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } rightConfig = config; } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } -void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) { +void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) { for (size_t i = 0; i < getChildEdges().size(); i++) { auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) @@ -806,7 +808,8 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri const auto &internalBlob = internalBlobs[i]; auto create = [&] () { - auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc()); + // TODO [DS]: internal blobs should be removed or rewritten using Memory object + auto newDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(internalBlob->getTensorDesc()); MKLDNNMemory memory{ engine }; memory.Create(newDesc, internalBlob->buffer()); @@ -947,119 +950,60 @@ const std::vector& MKLDNNNode::getPrimitivesPriority() { return implPriorities; } -bool MKLDNNNode::isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const { - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return true; - - if (desc.getBlockingDesc().getOffsetPadding() == std::numeric_limits::max()) - return true; - - for (size_t i = 0; i < desc.getBlockingDesc().getOrder().size(); i++) { - if (desc.getBlockingDesc().getOffsetPaddingToData()[i] == std::numeric_limits::max() || - desc.getBlockingDesc().getStrides()[i] == std::numeric_limits::max()) - return true; - } - - return false; -} - -InferenceEngine::TensorDesc MKLDNNNode::getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const { - if (!isUninitTensorDesc(config.inConfs[idx].desc)) - return config.inConfs[idx].desc; - +std::unique_ptr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t idx) const { int num = getParentEdgeAt(idx)->getInputNum(); auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName(); - if (selectedPD->getConfig().outConfs.size() <= num) - num = 0; + if (config.inConfs[idx].desc->isDefined()) { + return config.inConfs[idx].desc->clone(); + } if (config.inConfs[idx].inPlace >= 0) { - return getConfiguredOutputDesc(config, static_cast(config.inConfs[idx].inPlace)); + return getDefinedOutputDesc(config, static_cast(config.inConfs[idx].inPlace)); } if (num >= 0) { auto parentConf = selectedPD->getConfig().outConfs[num]; - parentConf.desc.setPrecision(config.inConfs[idx].desc.getPrecision()); - if (isUninitTensorDesc(parentConf.desc) && parentConf.inPlace >= 0) + parentConf.desc->setPrecision(config.inConfs[idx].desc->getPrecision()); + if (!parentConf.desc->isDefined() && parentConf.inPlace >= 0) getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor(); parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; - if (!isUninitTensorDesc(parentConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(parentConf.desc, config.inConfs[idx].desc)) { - return parentConf.desc; - } - - if (config.inConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY && - parentConf.desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(parentConf.desc.getPrecision(), - parentConf.desc.getDims(), { - parentConf.desc.getBlockingDesc().getBlockDims(), - parentConf.desc.getBlockingDesc().getOrder() - }); + if (parentConf.desc->isDefined() && parentConf.desc->isCompatible(*config.inConfs[idx].desc)) { + return parentConf.desc->clone(); } } - if (config.inConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(), - config.inConfs[idx].desc.getDims(), { - config.inConfs[idx].desc.getBlockingDesc().getBlockDims(), - config.inConfs[idx].desc.getBlockingDesc().getOrder() - }); - } - - return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(), - config.inConfs[idx].desc.getDims(), - InferenceEngine::TensorDesc::getLayoutByDims(config.inConfs[idx].desc.getDims())); + return MemoryDescUtils::resetOffset(config.inConfs[idx].desc.get()); } -InferenceEngine::TensorDesc MKLDNNNode::getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const { - if (!isUninitTensorDesc(config.outConfs[idx].desc)) - return config.outConfs[idx].desc; - +std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const { int num = getChildEdgeAt(idx)->getOutputNum(); auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName(); - if (selectedPD->getConfig().inConfs.size() <= num) - num = 0; + if (config.outConfs[idx].desc->isDefined()) { + return config.outConfs[idx].desc->clone(); + } if (config.outConfs[idx].inPlace >= 0) { - return getConfiguredInputDesc(config, static_cast(config.outConfs[idx].inPlace)); + return getDefinedInputDesc(config, static_cast(config.outConfs[idx].inPlace)); } if (num >= 0) { auto childConf = selectedPD->getConfig().inConfs[num]; - childConf.desc.setPrecision(config.outConfs[idx].desc.getPrecision()); - if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0) + childConf.desc->setPrecision(config.outConfs[idx].desc->getPrecision()); + if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor(); childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - if (!isUninitTensorDesc(childConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[idx].desc)) { - return childConf.desc; + if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[idx].desc)) { + return childConf.desc->clone(); } - if (config.outConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY && - childConf.desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(childConf.desc.getPrecision(), - childConf.desc.getDims(), { - childConf.desc.getBlockingDesc().getBlockDims(), - childConf.desc.getBlockingDesc().getOrder() - }); - } - } - - if (config.outConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(), - config.outConfs[idx].desc.getDims(), { - config.outConfs[idx].desc.getBlockingDesc().getBlockDims(), - config.outConfs[idx].desc.getBlockingDesc().getOrder() - }); } - return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(), - config.outConfs[idx].desc.getDims(), - InferenceEngine::TensorDesc::getLayoutByDims(config.outConfs[idx].desc.getDims())); + return MemoryDescUtils::resetOffset(config.outConfs[idx].desc.get()); } void MKLDNNNode::initOptimalPrimitiveDescriptor() { @@ -1067,17 +1011,13 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field. - // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. - config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i)); + config.inConfs[i].desc = getDefinedInputDesc(config, i); } for (size_t i = 0; i < config.outConfs.size(); i++) { - // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field. - // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. - config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i)); + config.outConfs[i].desc = getDefinedOutputDesc(config, i); } initDescriptor(config); @@ -1086,38 +1026,22 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { } } -bool MKLDNNNode::isInitConfig(const InferenceEngine::LayerConfig& config) const { +bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { for (const auto& configs : {config.inConfs, config.outConfs}) { for (const auto &dc : configs) { - if (isUninitTensorDesc(dc.desc)) + if (!dc.desc->isDefined()) return false; } } return true; } -MKLDNNMemoryDesc MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNPlugin::make_unique(primitive_desc_it.src_desc(idx)); } -MKLDNNMemoryDesc MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)); } int MKLDNNNode::batchToProcess() { @@ -1126,15 +1050,15 @@ int MKLDNNNode::batchToProcess() { int MKLDNNNode::getMaxBatch() { // FIXME: batch != 0 dims number - if (!inDims.empty()) { - if (inDims[0].ndims()) - return inDims[0][0]; + if (!inputShapes.empty()) { + if (inputShapes[0].getRank()) + return static_cast(inputShapes[0].getStaticDims()[0]); else return 1; } - if (!outDims.empty() && outDims[0].ndims()) { - if (outDims[0].ndims()) - return outDims[0][0]; + if (!outputShapes.empty()) { + if (outputShapes[0].getRank()) + return static_cast(outputShapes[0].getStaticDims()[0]); else return 1; } @@ -1323,12 +1247,12 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const } const auto isBroadcastableToDataInput = [&]() { - const auto dataShape = getParentEdgeAt(fusingPort)->getDims().ToSizeVector(); + const auto dataShape = getParentEdgeAt(fusingPort)->getShape().getStaticDims(); for (size_t i = 0; i < getParentEdges().size(); i++) { if (i == fusingPort) continue; - auto weightShape = getParentEdgeAt(i)->getDims().ToSizeVector(); - if (!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape)) + auto weightShape = getParentEdgeAt(i)->getShape().getStaticDims(); + if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape)) return false; } return true; @@ -1351,7 +1275,11 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const { if (node->getType() == FakeQuantize) { - return node->getAlgorithm() != FQBinarization; + bool ret = node->getAlgorithm() != FQBinarization; + for (size_t i = 1; i < node->getParentEdges().size(); i++) { + ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; + } + return ret; } else if (node->getType() == Eltwise) { return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, @@ -1396,7 +1324,7 @@ void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector(outDims[0][outDims[0].ndims() > 1 ? 1 : 0]); + const size_t bufferSize = static_cast(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]); if (align == -1) { align = bufferSize; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 29618d51fdbaf5..77dab59e904d09 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -26,8 +26,10 @@ #include #include #include -#include +#include #include "cpu_types.h" +#include "cpu_shape.h" +#include "cpu_memory_desc.h" namespace MKLDNNPlugin { @@ -54,6 +56,8 @@ static std::string NameFromType(Type type) { return "Lrn"; case Pooling: return "Pooling"; + case AdaptivePooling: + return "AdaptivePooling"; case FullyConnected: return "FullyConnected"; case MatMul: @@ -192,89 +196,101 @@ static std::string NameFromType(Type type) { return "ExtractImagePatches"; case NonMaxSuppression: return "NonMaxSuppression"; + case MatrixNms: + return "MatrixNms"; + case MulticlassNms: + return "MulticlassNms"; default: return "Unknown"; } } -class PrimitiveDescInfo { +class PortConfigurator { public: - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type): config(conf) { - implementationType = type; - } + PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape, + bool constant = false, int inPlace = -1) : + blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), shape(shape), constant(constant), inPlace(inPlace) {} - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, const std::vector& outFmts): config(conf) { - implementationType = type; - outputLayouts = outFmts; + PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED, + bool constant = false, int inPlace = -1) : + blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), constant(constant), inPlace(inPlace) {} + + MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr blockedDescCreator; + const InferenceEngine::Precision prc; + const Shape shape; + bool constant = false; + int inPlace = -1; + +private: + static MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr getBlockedDescCreator(MKLDNNPlugin::LayoutType blockedDescType) { + auto& creators = MKLDNNPlugin::BlockedDescCreator::getCommonCreators(); + if (creators.find(blockedDescType) == creators.end()) { + IE_THROW() << "Cannot find tensor descriptor creator"; + } + return creators.at(blockedDescType); } +}; - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, mkldnn::memory::format_tag outFmt): config(conf) { - implementationType = type; +struct PortConfig { + PortConfig() = default; - setOutputLayouts(outFmt); + PortConfig(const PortConfig& rhs) { + this->constant = rhs.constant; + this->inPlace = rhs.inPlace; + if (rhs.desc) { + this->desc = rhs.desc->clone(); + } } - PrimitiveDescInfo(const PrimitiveDescInfo &descInfo) = default; - PrimitiveDescInfo(PrimitiveDescInfo &&descInfo) = default; + PortConfig& operator=(const PortConfig& rhs) { + this->constant = rhs.constant; + this->inPlace = rhs.inPlace; + if (rhs.desc) { + this->desc = rhs.desc->clone(); + } + return *this; + } - PrimitiveDescInfo &operator=(const PrimitiveDescInfo &descInfo) = default; + PortConfig(PortConfig&& rhs) = default; + PortConfig& operator=(PortConfig&& rhs) = default; - const InferenceEngine::LayerConfig getConfig() const { - return config; + // TODO [DS]: better to make private and const + bool constant = false; + int inPlace = -1; + std::unique_ptr desc; +}; + +struct NodeConfig { + bool dynBatchSupport = false; + std::vector inConfs; + std::vector outConfs; +}; + +class NodeDesc { +public: + NodeDesc(const NodeConfig& conf, impl_desc_type type): config(conf) { + implementationType = type; } - InferenceEngine::LayerConfig& getConfig() { + + const NodeConfig& getConfig() const { return config; } - impl_desc_type getImplementationType() const { - return implementationType; + void setConfig(const NodeConfig& config) { + this->config = config; } - const std::vector& getOutputLayouts() const { - return outputLayouts; + impl_desc_type getImplementationType() const { + return implementationType; } void setImplementationType(impl_desc_type type) { implementationType = type; } - void setOutputLayouts(mkldnn::memory::format_tag outFmt) { - outputLayouts.clear(); - - for (int i = 0; i < config.outConfs.size(); i++) { - outputLayouts.push_back(outFmt); - } - } - private: - InferenceEngine::LayerConfig config; + NodeConfig config; impl_desc_type implementationType; - std::vector outputLayouts; -}; - -class DataConfigurator { -public: - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc, const InferenceEngine::SizeVector& shape, - bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape(shape), constant(constant), inplace(inplace) {} - - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED, - bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape({}), constant(constant), inplace(inplace) {} - - const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator; - const InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED; - const InferenceEngine::SizeVector shape; - const bool constant = false; - const int inplace = -1; -private: - static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) { - auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators(); - if (creators.find(tensorDescType) == creators.end()) { - IE_THROW() << "Cannot find tensor descriptor creator"; - } - return creators.at(tensorDescType); - } }; class MKLDNNNode { @@ -420,18 +436,18 @@ class MKLDNNNode { return type; } - const std::vector& getSupportedPrimitiveDescriptors() const { + const std::vector& getSupportedPrimitiveDescriptors() const { return supportedPrimitiveDescriptors; } - inline const PrimitiveDescInfo* getSelectedPrimitiveDescriptor() const { + inline const NodeDesc* getSelectedPrimitiveDescriptor() const { if (selectedPrimitiveDescriptorIndex < 0 || selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size()) return nullptr; return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex]; } - inline PrimitiveDescInfo* getSelectedPrimitiveDescriptor() { + inline NodeDesc* getSelectedPrimitiveDescriptor() { if (selectedPrimitiveDescriptorIndex < 0 || selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size()) return nullptr; @@ -467,9 +483,10 @@ class MKLDNNNode { virtual void initOptimalPrimitiveDescriptor(); virtual void getSupportedDescriptors() = 0; - virtual void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) {} - virtual void initDescriptor(const InferenceEngine::LayerConfig& config); + // TODO [DS]: Should be moved into Node derivative class + virtual void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) {} + virtual void initDescriptor(const NodeConfig& config); virtual bool created() const = 0; virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) { return created(); @@ -483,23 +500,19 @@ class MKLDNNNode { template PD createPrimitiveDescriptor(const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) { - auto descsEqual = [](const std::vector& srcDescs, - const std::vector& selectedDescs) { + auto descsCompatible = [](const std::vector& srcDescs, + const std::vector& selectedDescs) { if (srcDescs.empty() && selectedDescs.empty()) return true; if (srcDescs.empty() || selectedDescs.empty()) return false; for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) { - if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() && - srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() && - srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) && - srcDescs[i].getLayout() != InferenceEngine::Layout::ANY) - return false; + return srcDescs[i]->isCompatible(*selectedDescs[i].desc); } return true; }; - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; @@ -507,19 +520,19 @@ class MKLDNNNode { auto itpd = desc.createPrimitiveDescriptorIterator(engine, attr); while (static_cast(itpd)) { - std::vector srcDescs; + std::vector srcDescs; for (size_t i = 0; i < descInputNumbers(desc); i++) srcDescs.push_back(getSrcMemDesc(itpd, i)); - std::vector dstDescs; + std::vector dstDescs; for (size_t i = 0; i < descOutputNumbers(desc); i++) dstDescs.push_back(getDstMemDesc(itpd, i)); impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); if (impl_type == selected_pd->getImplementationType() && - descsEqual(srcDescs, selected_pd->getConfig().inConfs) && - descsEqual(dstDescs, selected_pd->getConfig().outConfs)) { + descsCompatible(srcDescs, selected_pd->getConfig().inConfs) && + descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) { prepareMemory(selected_pd, itpd); PD prim_desc = createPd(desc); return {itpd.get()}; @@ -646,10 +659,10 @@ class MKLDNNNode { virtual int getMaxBatch(); - virtual InferenceEngine::TensorDesc getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const; - virtual InferenceEngine::TensorDesc getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const; - virtual MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); - virtual MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getDefinedInputDesc(const NodeConfig &config, size_t idx) const; + virtual std::unique_ptr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const; + virtual std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); /** * @brief Appends new item into ops list with the information on how the node should be executed as post operation. @@ -663,8 +676,8 @@ class MKLDNNNode { GetPrimitiveMemoryFormatFunc; std::vector internalBlobDesc; - std::vector inDims; - std::vector outDims; + std::vector inputShapes; + std::vector outputShapes; std::vector fusedWith; std::vector mergedWith; @@ -689,12 +702,11 @@ class MKLDNNNode { ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; std::vector internalBlobMemory; - std::vector supportedPrimitiveDescriptors; + std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; MKLDNNPrimitive prim; std::vector descs; - InferenceEngine::Blob::Ptr ext_scales; MKLDNNWeightsSharing::Ptr weightCache; Algorithm algorithm = Algorithm::Undefined; @@ -706,14 +718,13 @@ class MKLDNNNode { friend class MKLDNNGraphOptimizer; friend class NodeDumper; - bool isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const; - bool isInitConfig(const InferenceEngine::LayerConfig& config) const; void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); + bool isConfigDefined(const NodeConfig &config) const; virtual bool canBeInPlace() const; virtual const std::vector& getPrimitivesPriority(); - virtual std::vector getAvailableFormatsForDims(const MKLDNNDims& dims) const; + virtual std::vector getAvailableFormatsForDims(const Shape& dims) const; int batchToProcess(); InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped); @@ -730,42 +741,39 @@ class MKLDNNNode { */ virtual std::vector getOutputPrecisions() const; - void addSupportedPrimDesc(const std::vector& inDataConfigurators, - const std::vector& outDataConfigurators, + void addSupportedPrimDesc(const std::vector& inPortConfigs, + const std::vector& outPortConfigs, impl_desc_type implType, bool dynBatchSupport = false) { - auto fill_port = [] (const DataConfigurator& dataConfigurator, const InferenceEngine::SizeVector& dims, - InferenceEngine::Precision prc, std::vector& port) -> bool { - // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator. - // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank. - if (dims.size() < dataConfigurator.tensorDescCreator->getMinimalRank()) + auto fill_port = [] (const PortConfigurator& portConfigurator, const Shape& shape, + InferenceEngine::Precision prc, std::vector& port) -> bool { + // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by blockedDescCreator. + // This should be suitable for major of scenarios since almost all nodes add `ncsp` blockedDescCreator which supports any shape rank. + if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank()) return false; - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = dataConfigurator.inplace; - dataConfig.constant = dataConfigurator.constant; - - dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(prc, dims); + PortConfig portConfig; + portConfig.inPlace = portConfigurator.inPlace; + portConfig.constant = portConfigurator.constant; + portConfig.desc = portConfigurator.blockedDescCreator->createUniqueDesc(prc, shape.getStaticDims()); - port.push_back(dataConfig); + port.push_back(std::move(portConfig)); return true; }; - InferenceEngine::LayerConfig config; - for (size_t i = 0; i < inDataConfigurators.size(); i++) { - auto dims = inDataConfigurators[i].shape.empty() ? getParentEdgesAtPort(i)[0]->getDims().ToSizeVector() : inDataConfigurators[i].shape; - auto prc = inDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) - : inDataConfigurators[i].prc; - if (!fill_port(inDataConfigurators[i], dims, prc, config.inConfs)) + NodeConfig config; + for (size_t i = 0; i < inPortConfigs.size(); i++) { + auto shape = inPortConfigs[i].shape.getRank() == 0 ? getParentEdgesAtPort(i)[0]->getShape() : inPortConfigs[i].shape; + auto prc = inPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc; + if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs)) return; } - for (size_t i = 0; i < outDataConfigurators.size(); i++) { - auto dims = outDataConfigurators[i].shape.empty() ? getChildEdgesAtPort(i)[0]->getDims().ToSizeVector() : outDataConfigurators[i].shape; - auto prc = outDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) - : outDataConfigurators[i].prc; - if (!fill_port(outDataConfigurators[i], dims, prc, config.outConfs)) + for (size_t i = 0; i < outPortConfigs.size(); i++) { + auto dims = outPortConfigs[i].shape.getRank() == 0 ? getChildEdgesAtPort(i)[0]->getShape() : outPortConfigs[i].shape; + auto prc = outPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc; + if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs)) return; } @@ -811,7 +819,7 @@ class MKLDNNNode { return PD(*selected_desc_ptr, engine); } - void prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd); + void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd); enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; ConstantType checkConstant(LOOK look, std::vector& checkNodes); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 733d785d5940b1..c7907aa55692b2 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -26,6 +26,7 @@ #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include #include +#include #include #include #include @@ -56,7 +57,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -74,13 +78,12 @@ #include #include -#include -#include -#include +#include #include #include #include -#include +#include +#include #include #include @@ -88,6 +91,7 @@ #include "nodes/mkldnn_mvn_node.h" #include "nodes/mkldnn_fake_quantize_node.h" +#include "nodes/mkldnn_normalize_node.h" #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp" #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) @@ -121,7 +125,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { const bool useLpt = (conf.lpTransformsMode == Config::LPTransformsMode::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc); if (useLpt) { manager.register_pass( std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); @@ -166,6 +170,9 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); if (useLpt) { @@ -278,6 +285,13 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return node->input_value(0).get_partial_shape().rank().get_length() > 5; }); + auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool { + std::string errorMsg; + return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg); + }; + pass_config->set_callback(normalizeL2FusionCallback); + pass_config->set_callback(normalizeL2FusionCallback); + // List of enabled/disabled transformations pass_config->disable(); pass_config->disable(); @@ -293,6 +307,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { pass_config->disable(); pass_config->disable(); pass_config->disable(); + pass_config->disable(); pass_config->enable(); pass_config->enable(); @@ -313,30 +328,45 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { if (useLpt) { OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations"); - ngraph::pass::Manager manager; - auto lptPrerequisites = manager.register_pass(); - const std::vector supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(); - manager.run_passes(nGraphFunc); - - auto params = LayerTransformation::Params( - true, // updatePrecisions - LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations - LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights - true); // supportAsymmetricQuantization - LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params) - .add( - LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true)) - .add( - LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true)) - .addStandaloneCleanup( - LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })) - .add( - LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))); - - transformer.transform(nGraphFunc); + auto supportedPrecisions = std::vector({ + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}}, + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}}, + }), + }); + + auto perTensorQuantization = std::vector({ + OperationPerTensorQuantizationRestriction::create({0}), + OperationPerTensorQuantizationRestriction::create({0}) + }); + + ngraph::pass::Manager lptManager; + lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); + }); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + return MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node); + }); + lptManager.run_passes(nGraphFunc); } ngraph::pass::Manager postLPTPassManager; diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp deleted file mode 100644 index b611c8eb0a4ba6..00000000000000 --- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include "nodes/list.hpp" -#include "common/tensor_desc_creator.h" -#include "ngraph/descriptor/tensor.hpp" -#include -#include "cpu_types.h" - -#include -#include - -namespace InferenceEngine { -namespace Extensions { -namespace Cpu { - -class ExtLayerBase: public ILayerExecImpl { -public: - StatusCode getSupportedConfigurations(std::vector& conf, ResponseDesc *resp) noexcept override { - if (!errorMsg.empty()) { - if (resp) { - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - conf = confs; - return OK; - } - - StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override { - for (auto& input : config.inConfs) { - for (auto& offset : input.desc.getBlockingDesc().getOffsetPaddingToData()) { - if (offset) { - return GENERAL_ERROR; - } - } - if (input.desc.getBlockingDesc().getOffsetPadding()) { - return GENERAL_ERROR; - } - } - for (auto& output : config.outConfs) { - for (auto& offset : output.desc.getBlockingDesc().getOffsetPaddingToData()) { - if (offset) { - return GENERAL_ERROR; - } - } - if (output.desc.getBlockingDesc().getOffsetPadding()) { - return GENERAL_ERROR; - } - } - return OK; - } - -protected: - MKLDNNPlugin::Algorithm getAlgorithm() const { - return algorithm; - } - MKLDNNPlugin::Algorithm algorithm; - - class DataConfigurator { - public: - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, Precision prc = Precision::UNSPECIFIED, bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), constant(constant), inplace(inplace) {} - - DataConfigurator(const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr& tensorDescCreator, Precision prc = Precision::UNSPECIFIED, - bool constant = false, int inplace = -1) : tensorDescCreator(tensorDescCreator), prc(prc), constant(constant), inplace(inplace) {} - - const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator; - const bool constant = false; - const int inplace = -1; - const Precision prc = Precision::UNSPECIFIED; // By default ngraph node precision is used - private: - static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) { - auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators(); - if (creators.find(tensorDescType) == creators.end()) { - IE_THROW() << "Cannot find tensor descriptor creator"; - } - return creators.at(tensorDescType); - } - }; - - void addConfig(const std::shared_ptr& op, - const std::vector& inDataConfigurators, - const std::vector& outDataConfigurators, - bool dynBatchSupport = false) { - LayerConfig config; - - if (inDataConfigurators.size() != op->get_input_size()) - IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of inputs: " << - "expected: " << op->get_input_size() << ", provided: " << inDataConfigurators.size(); - if (outDataConfigurators.size() != op->get_output_size()) - IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of outputs: " << - "expected: " << op->get_output_size() << ", provided: " << outDataConfigurators.size(); - - auto fill_port = [] (const DataConfigurator& dataConfigurator, const ngraph::descriptor::Tensor& tensor, std::vector& port) -> bool { - // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator. - // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank. - if (tensor.get_shape().size() < dataConfigurator.tensorDescCreator->getMinimalRank()) - return false; - - auto precision = dataConfigurator.prc != Precision::UNSPECIFIED ? dataConfigurator.prc : details::convertPrecision(tensor.get_element_type()); - - DataConfig dataConfig; - dataConfig.inPlace = dataConfigurator.inplace; - dataConfig.constant = dataConfigurator.constant; - dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(precision, tensor.get_shape()); - - port.push_back(dataConfig); - - return true; - }; - - for (size_t i = 0; i < inDataConfigurators.size(); i++) - if (!fill_port(inDataConfigurators[i], op->get_input_tensor(i), config.inConfs)) - return; - - for (size_t i = 0; i < outDataConfigurators.size(); i++) - if (!fill_port(outDataConfigurators[i], op->get_output_tensor(i), config.outConfs)) - return; - - config.dynBatchSupport = dynBatchSupport; - confs.push_back(config); - } - - std::string errorMsg; - std::vector confs; -}; - -template -class ImplFactory : public ILayerImplFactory { -public: - explicit ImplFactory(const std::shared_ptr& op) : ngraphOp(op) {} - - // First implementation has more priority than next - StatusCode getImplementations(std::vector& impls, ResponseDesc *resp) noexcept override { - try { - impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp))); - } catch (const InferenceEngine::Exception& ex) { - strncpy(resp->msg, ex.what(), sizeof(resp->msg) - 1); - IE_SUPPRESS_DEPRECATED_START - return ex.getStatus() != OK ? ex.getStatus() : GENERAL_ERROR; - IE_SUPPRESS_DEPRECATED_END - } - return OK; - } -protected: - const std::shared_ptr ngraphOp; -}; - -#define REG_FACTORY_FOR(__prim, __type) \ - void __prim ## __type(MKLDNNExtensions * extInstance) { \ - using namespace MKLDNNPlugin; \ - extInstance->layersFactory.registerNodeIfRequired(MKLDNNPlugin, __type, OV_PP_TOSTRING(__type), ImplFactory<__prim>); \ - } - -} // namespace Cpu -} // namespace Extensions -} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp similarity index 57% rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp index 0467d205fb71b1..85566b3833ac6b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "tensor_desc_creator.h" +#include "blocked_desc_creator.h" #include using namespace InferenceEngine; @@ -11,19 +11,19 @@ using namespace MKLDNNPlugin; namespace { constexpr size_t channelsPos = 1lu; -class PlainFormatCreator : public TensorDescCreator { +class PlainFormatCreator : public BlockedDescCreator { public: - virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { SizeVector order(srcDims.size()); std::iota(order.begin(), order.end(), 0); - return TensorDesc(precision, srcDims, {srcDims, order}); + return BlockedMemoryDesc(precision, srcDims, srcDims, order); } - virtual size_t getMinimalRank() const { return 0lu; } + size_t getMinimalRank() const override { return 0lu; } }; -class PerChannelCreator : public TensorDescCreator { +class PerChannelCreator : public BlockedDescCreator { public: - virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override { SizeVector order(srcDims.size()); std::iota(order.begin(), order.end(), 0); SizeVector blkDims = srcDims; @@ -37,15 +37,15 @@ class PerChannelCreator : public TensorDescCreator { moveElementBack(blkDims, channelsPos); } - return TensorDesc(precision, srcDims, {blkDims, order}); + return BlockedMemoryDesc(precision, srcDims, blkDims, order); } - virtual size_t getMinimalRank() const { return 3lu; } + size_t getMinimalRank() const override { return 3lu; } }; -class ChannelBlockedCreator : public TensorDescCreator { +class ChannelBlockedCreator : public BlockedDescCreator { public: ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {} - virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { if (srcDims.size() < 2) { IE_THROW() << "Can't create blocked tensor descriptor!"; } @@ -55,28 +55,30 @@ class ChannelBlockedCreator : public TensorDescCreator { order.push_back(channelsPos); SizeVector blkDims = srcDims; - blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0); + if (Shape::UNDEFINED_DIM != blkDims[channelsPos]) { + blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0); + } blkDims.push_back(_blockSize); - return TensorDesc(precision, srcDims, {blkDims, order}); + return BlockedMemoryDesc(precision, srcDims, blkDims, order); } - virtual size_t getMinimalRank() const { return 3lu; } + size_t getMinimalRank() const override { return 3lu; } private: size_t _blockSize; }; } // namespace -const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() { - static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) }, - { TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) }, - { TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) }, - { TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } }; +const BlockedDescCreator::CreatorsMap& BlockedDescCreator::getCommonCreators() { + static const CreatorsMap map{ { LayoutType::nspc, CreatorConstPtr(new PerChannelCreator) }, + { LayoutType::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) }, + { LayoutType::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) }, + { LayoutType::ncsp, CreatorConstPtr(new PlainFormatCreator) } }; return map; } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) { auto rankFilter = [rank](const CreatorsMap::value_type& item) { if (item.second->getMinimalRank() > rank) { return false; @@ -90,7 +92,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes) { unsigned bitMask = 0ul; for (auto& item : supportedTypes) { bitMask |= 1 << static_cast(item); @@ -112,7 +114,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, cons } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, BlockedDescCreator::Predicate predicate) { auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end()); auto last = first.end(); return std::make_pair(first, last); diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h similarity index 75% rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h index 4fda57fcb2fe85..f53524288e4e7c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h @@ -4,25 +4,19 @@ #pragma once -#include #include +#include "cpu_shape.h" +#include "cpu_blocked_memory_desc.h" namespace MKLDNNPlugin { -enum class TensorDescCreatorTypes : unsigned { - nspc, // general per channels format - ncsp, // general planar - nCsp8c, // general channels blocked by 8 - nCsp16c // general channels blocked by 16 -}; - class CreatorsMapFilterConstIterator; -class TensorDescCreator { +class BlockedDescCreator { public: - typedef std::shared_ptr CreatorPtr; - typedef std::shared_ptr CreatorConstPtr; - typedef std::map CreatorsMap; + typedef std::shared_ptr CreatorPtr; + typedef std::shared_ptr CreatorConstPtr; + typedef std::map CreatorsMap; typedef std::function Predicate; public: @@ -30,17 +24,20 @@ class TensorDescCreator { static std::pair makeFilteredRange(const CreatorsMap &map, unsigned rank); static std::pair - makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); + makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); static std::pair makeFilteredRange(const CreatorsMap& map, Predicate predicate); - virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0; + virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0; + std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + return MKLDNNPlugin::make_unique(createDesc(precision, srcDims)); + } virtual size_t getMinimalRank() const = 0; - virtual ~TensorDescCreator() = default; + virtual ~BlockedDescCreator() = default; }; class CreatorsMapFilterConstIterator { public: - typedef TensorDescCreator::CreatorsMap::const_iterator Iterator; + typedef BlockedDescCreator::CreatorsMap::const_iterator Iterator; typedef std::iterator_traits::value_type value_type; typedef std::iterator_traits::reference reference; typedef std::iterator_traits::pointer pointer; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp new file mode 100644 index 00000000000000..4bf60d6eb21f4a --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp @@ -0,0 +1,264 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_adaptive_pooling.h" +#include "ie_parallel.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using namespace mkldnn; +using namespace mkldnn::impl::cpu::x64; + +bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) { + auto adaPool = std::dynamic_pointer_cast(op); + if (!adaPool) { + errorMessage = "Only opset8 AdaptiveAvgPooling operation is supported"; + return false; + } + } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) { + auto adaPool = std::dynamic_pointer_cast(op); + if (!adaPool) { + errorMessage = "Only opset8 AdaptiveMaxPooling operation is supported"; + return false; + } + } else { + errorMessage = "Unsupported Adaptive pooling mode"; + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, + MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (isSupportedOperation(op, errorMessage)) { + errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' "; + } else { + IE_THROW(NotImplemented) << errorMessage; + } + if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) { + algorithm = Algorithm::AdaptivePoolingAvg; + } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) { + algorithm = Algorithm::AdaptivePoolingMax; + } +} + +void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() { + if (!descs.empty()) + return; + + if (getParentEdges().size() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size(); + if (getChildEdges().size() != (algorithm == AdaptivePoolingMax ? 2 : 1)) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getParentEdges().size(); + + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); + + spatialDimsCount = parentDims.size() - 2; + if (!one_of(spatialDimsCount, 1, 2, 3)) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + } + + if (getParentEdgeAt(1)->getShape().getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + } + + if (getChildEdgeAt(0)->getShape().getRank() != getParentEdgeAt(0)->getShape().getRank()) { + IE_THROW() << errorPrefix << "must keep data rank"; + } +} + +void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + // we supports only fp32 currently + precision = Precision::FP32; + + InferenceEngine::LayerConfig config; + config.dynBatchSupport = false; + config.inConfs.resize(2); + config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2)); + + std::vector dataFormats{ LayoutType::ncsp }; + if (getParentEdgeAt(0)->getShape().getStaticDims()[1] != 1) { + dataFormats.push_back(LayoutType::nspc); + dataFormats.push_back(LayoutType::nCsp16c); + dataFormats.push_back(LayoutType::nCsp8c); + } + for (const auto &df : dataFormats) { + if (algorithm == Algorithm::AdaptivePoolingAvg) { + addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}}, + {{df, precision}}, + impl_desc_type::unknown); + } else { + addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}}, + {{df, precision}, {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::unknown); + } + } +} + +void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { + auto inputPrec = getParentEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + auto outputPrec = getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32)) + IE_THROW() << errorPrefix << "doesn't support demanded precisions"; + + auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); + auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); + int *indexDst = nullptr; + + if (algorithm == Algorithm::AdaptivePoolingMax) { + indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetPtr()); + } + + auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking; + + int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; + auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); + auto isTailCFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + + const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + + if (srcMemory1.GetElementsCount() != spatialDimsCount) + IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetElementsCount() + << ") inconsistent with pooling vector size (" << spatialDimsCount << ")"; + + auto inputDimVector = srcMemory0.GetDims(); + const int N = static_cast(inputDimVector[0]); + const int C = static_cast(inputDimVector[1]); + const int ID = static_cast(spatialDimsCount == 3 ? inputDimVector[2] : 1); + const int IH = static_cast(spatialDimsCount >= 2 ? inputDimVector[spatialDimsCount] : 1); + const int IW = static_cast(inputDimVector[spatialDimsCount + 1]); + + const int OD = static_cast(spatialDimsCount == 3 ? srcPooledSpatialShapes[0] : 1); + const int OH = static_cast(spatialDimsCount >= 2 ? srcPooledSpatialShapes[spatialDimsCount - 2] : 1); + const int OW = static_cast(srcPooledSpatialShapes[spatialDimsCount - 1]); + + const int iHW = IH * IW; + const int oDHW = OD * OH * OW, oHW = OH * OW; + + const int chPadding = srcMemory0.GetDescriptor().data.padded_dims[1]; + const int blockCount = (isTailCFmt ? 1 : chPadding / blockSize); + auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); + if (!selectedPrimitiveDescriptor) + IE_THROW() << errorPrefix << "doesn't have primitive descriptors."; + auto config = selectedPrimitiveDescriptor->getConfig(); + auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + + // unified strides array + const size_t tailDimsOffset = (isTailCFmt ? -1 : 0); + const size_t inStrides[5] = { + srcStrides[0], + (isTailCFmt ? 1 : srcStrides[1]), + (spatialDimsCount == 3 ? srcStrides[2 + tailDimsOffset] : 0), + (spatialDimsCount >= 2 ? srcStrides[spatialDimsCount + tailDimsOffset] : 0), + srcStrides[spatialDimsCount + 1 + tailDimsOffset] }; + const size_t outStrides[5] = { + dstStrides[0], + (isTailCFmt ? 1 : dstStrides[1]), + (spatialDimsCount == 3 ? dstStrides[2 + tailDimsOffset] : 0), + (spatialDimsCount >= 2 ? dstStrides[spatialDimsCount + tailDimsOffset] : 0), + dstStrides[spatialDimsCount + 1 + tailDimsOffset] }; + + std::function pool; + auto poolMax = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) { + size_t dStart, dEnd, hStart, hEnd, wStart, wEnd; + setBinBorders(&dStart, &dEnd, od, ID, OD); + setBinBorders(&hStart, &hEnd, oh, IH, OH); + setBinBorders(&wStart, &wEnd, ow, IW, OW); + float res = srcData[dStart * inStrides[2] + hStart * inStrides[3] + wStart * inStrides[4]]; // initial max value + int resIndex = dStart * iHW + hStart * IW + wStart; // initial max index + for (size_t pixD = dStart; pixD < dEnd; pixD++) { + for (size_t pixH = hStart; pixH < hEnd; pixH++) { + for (size_t pixW = wStart; pixW < wEnd; pixW++) { + float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]]; + resIndex = (res < curr ? pixD * iHW + pixH * IW + pixW : resIndex); + res = std::max(res, curr); + } + } + } + *dstData = res; + indexDst[spatIndOff * oDHW + od * oHW + oh * OW + ow] = resIndex; + }; + auto poolAvg = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) { + size_t dStart, dEnd, hStart, hEnd, wStart, wEnd; + setBinBorders(&dStart, &dEnd, od, ID, OD); + setBinBorders(&hStart, &hEnd, oh, IH, OH); + setBinBorders(&wStart, &wEnd, ow, IW, OW); + auto binSize = (dEnd - dStart) * (hEnd - hStart) * (wEnd - wStart); + if (binSize == 0) + IE_THROW() << errorPrefix << "has empty bin"; + float sum = 0; + for (size_t pixD = dStart; pixD < dEnd; pixD++) { + for (size_t pixH = hStart; pixH < hEnd; pixH++) { + for (size_t pixW = wStart; pixW < wEnd; pixW++) { + float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]]; + sum = sum + curr; + } + } + } + *dstData = sum / binSize; + }; + + if (algorithm == Algorithm::AdaptivePoolingMax) { + pool = poolMax; + } else { + pool = poolAvg; + } + + parallel_for5d(N, blockCount, OD, OH, OW, + [&](int n, int blkIdx, int od, int oh, int ow) { + auto srcData = src + n * inStrides[0] + blkIdx * inStrides[1]; + auto dstData = dst + n * outStrides[0] + blkIdx * outStrides[1] + + od * outStrides[2] + oh * outStrides[3] + ow * outStrides[4]; + int cStart = 0, cEnd = C, inResidual = 0, outResidual = 0; + if (!isTailCFmt) { + cStart = blkIdx * blockSize; + cEnd = (blkIdx == blockCount - 1 ? C : cStart + blockSize); + } + for (int c = cStart; c < cEnd; c++) { + if (isTailCFmt) { + inResidual = c * inStrides[1]; + outResidual = c * outStrides[1]; + } else if (!isPlainFmt) { + inResidual = outResidual = c % blockSize; + } + pool(srcData + inResidual, dstData + outResidual, od, oh, ow, n * C + c); + }}); +} + +bool MKLDNNAdaptivePoolingNode::created() const { + return getType() == AdaptivePooling; +} + +void MKLDNNAdaptivePoolingNode::createPrimitive() {} + +inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) { + *(startPtr) = idx * inputLength / outputLength; + *(endPtr) = ceil(static_cast((idx + 1) * inputLength) / outputLength); +} + +REG_MKLDNN_PRIM_FOR(MKLDNNAdaptivePoolingNode, AdaptivePooling) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h new file mode 100644 index 00000000000000..386e57f4dcf01f --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h @@ -0,0 +1,35 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace MKLDNNPlugin { + +class MKLDNNAdaptivePoolingNode : public MKLDNNNode { +public: + MKLDNNAdaptivePoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + int spatialDimsCount; + InferenceEngine::Precision precision = InferenceEngine::Precision::FP32; + inline void setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength); + + std::string errorPrefix; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp index e2616f43c99dd4..8700a70c5b6450 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_batch_to_space_node.h" -#include +#include #include using namespace MKLDNNPlugin; @@ -67,32 +67,32 @@ void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() { if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nspc, precision}}, + addSupportedPrimDesc({{LayoutType::nspc, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] % 8 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] % 16 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } } @@ -112,15 +112,16 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto layout = getParentEdgeAt(0)->getDesc().getLayout(); - const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC; + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + + const bool blocked = srcDesc.hasLayoutType(LayoutType::nCsp8c) || srcDesc.hasLayoutType(LayoutType::nCsp16c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(inDims); auto outShape5D = getShape5D(outDims); auto blockShape = getShape5D(blockShapeIn); - if (layout == NHWC || layout == NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,9 +130,11 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { blockShape.erase(blockShape.begin() + 1); } - const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu; - const size_t blockCountInput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; - const size_t blockCountOutput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + + const size_t blockSize = blocked ? dstDesc.getBlockDims().back() : 1lu; + const size_t blockCountInput = srcDesc.getBlockDims()[1]; + const size_t blockCountOutput = dstDesc.getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -166,7 +169,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1]; - if (layout == NHWC || layout == NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -221,12 +224,13 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { } void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { case 1: batchToSpaceKernel::value_type>(); break; case 2: batchToSpaceKernel::value_type>(); break; case 4: batchToSpaceKernel::value_type>(); break; default: - IE_THROW() << "BatchToSpace layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'"; + IE_THROW() << "BatchToSpace layer does not support precision '" << + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) << "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h index cab89df7dc6a85..353ea634511dc3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h @@ -24,6 +24,10 @@ class MKLDNNBatchToSpaceNode : public MKLDNNNode { static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; +private: + template + void batchToSpaceKernel(); + private: InferenceEngine::SizeVector inDims; InferenceEngine::SizeVector outDims; @@ -31,9 +35,6 @@ class MKLDNNBatchToSpaceNode : public MKLDNNNode { std::vector cropsBeginIn; std::string errorPrefix; - - template - void batchToSpaceKernel(); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 517066d6f32806..183bc158ff2399 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -942,16 +942,16 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } } @@ -961,7 +961,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { setPostOps(attr); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.inConfs[0].constant = false; @@ -975,26 +975,38 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { if (implType != impl_desc_type::ref) { // optimzed implementation - auto outputDataType = withBinarization ? memory::data_type::bin : memory::data_type::f32; - auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i; // auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OhIw16o32i : memory::format_tag::OhIw8o32i; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); + //activation + auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); + config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); + + //weights + size_t weiFirstDimBlockSize = implType == impl_desc_type::jit_avx512 ? 16 : 8; //memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i; + auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); + std::vector weiBlockDims = {div_up(weiDims[0], weiFirstDimBlockSize), div_up(weiDims[1], 32), + weiDims[2], weiDims[3], weiFirstDimBlockSize, 32}; + std::vector weiOrder = {0, 1, 2, 3, 0, 1}; + + config.inConfs[1].desc = MKLDNNPlugin::make_unique(Precision::BIN, weiDims, weiBlockDims, weiOrder); + + //result + auto outputPrecision = withBinarization ? Precision::BIN : Precision::FP32; + config.outConfs[0].desc = nspcCreator->createUniqueDesc(outputPrecision, getChildEdgeAt(0)->getShape().getStaticDims()); if (withSum) { config.inConfs.push_back(config.outConfs[0]); config.outConfs[0].inPlace = 2; } - supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc}); + supportedPrimitiveDescriptors.push_back({config, implType}); } else { // reference implementation - auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw; + auto weiCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nhwc); - supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc}); + config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[1].desc = weiCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(1)->getShape().getStaticDims()); + config.outConfs[0].desc = nspcCreator->createUniqueDesc(Precision::FP32, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.push_back({config, implType}); } } @@ -1003,11 +1015,9 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; - auto config = selectedPrimitiveDescriptor->getConfig(); - - auto srcDims = config.inConfs[0].desc.getDims(); - auto weiDims = config.inConfs[1].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); auto implType = selectedPrimitiveDescriptor->getImplementationType(); @@ -1061,9 +1071,12 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { jcp.nb_oc_blocking = nstl::min(implType == impl_desc_type::jit_sse42 ? 2 : implType == impl_desc_type::jit_avx2 ? 4 : 6, jcp.nb_oc); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(config.outConfs[0].desc.getPrecision()); - jcp.typesize_in = config.inConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.inConfs[0].desc.getPrecision().size(); - jcp.typesize_out = config.outConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.outConfs[0].desc.getPrecision().size(); + auto srcPrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); + auto dstPrecision = getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(); + + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision); + jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size(); + jcp.typesize_out = dstPrecision == Precision::BIN ? 1 : dstPrecision.size(); int r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1)); @@ -1093,7 +1106,11 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { return false; if (node->getType() == FakeQuantize) { - return node->getAlgorithm() == FQBinarization; + bool ret = node->getAlgorithm() == FQBinarization; + for (size_t i = 1; i < node->getParentEdges().size(); i++) { + ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; + } + return ret; } else { return canFuseSimpleOperation(node); } @@ -1277,30 +1294,28 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { auto weights = reinterpret_cast(weightsMemory->GetPtr()); auto dst = reinterpret_cast(dstMemory->GetPtr()); - auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) - IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; - - auto config = selectedPrimitiveDescriptor->getConfig(); - - auto srcBlockDesc = config.inConfs[0].desc.getBlockingDesc(); - std::vector srcStride(srcBlockDesc.getStrides().size()); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + std::vector srcStride(srcDesc.getStrides().size()); for (int i = 0; i < srcStride.size(); i++) { - srcStride[srcBlockDesc.getOrder()[i]] = srcBlockDesc.getStrides()[i]; + srcStride[srcDesc.getOrder()[i]] = srcDesc.getStrides()[i]; } - auto weiBlockDesc = config.inConfs[1].desc.getBlockingDesc(); - std::vector weightsStride(config.inConfs[1].desc.getDims().size()); + auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType(); + std::vector weightsStride(weiDesc.getShape().getRank()); for (int i = 0; i < weightsStride.size(); i++) { - weightsStride[weiBlockDesc.getOrder()[i]] = weiBlockDesc.getStrides()[i]; + weightsStride[weiDesc.getOrder()[i]] = weiDesc.getStrides()[i]; } - auto dstBlockDesc = config.outConfs[0].desc.getBlockingDesc(); - std::vector dstStride(dstBlockDesc.getStrides().size()); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + std::vector dstStride(dstDesc.getStrides().size()); for (int i = 0; i < dstStride.size(); i++) { - dstStride[dstBlockDesc.getOrder()[i]] = dstBlockDesc.getStrides()[i]; + dstStride[dstDesc.getOrder()[i]] = dstDesc.getStrides()[i]; } + auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); + if (!selectedPrimitiveDescriptor) + IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; + auto implType = selectedPrimitiveDescriptor->getImplementationType(); if (implType != impl_desc_type::ref) { executeOptimized(src, weights, dst, srcStride, weightsStride, dstStride); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp index 3d9815d48c18a1..ef9c14ad0d4eef 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_broadcast_node.h" -#include +#include #include #include "common/cpu_memcpy.h" @@ -60,18 +60,20 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() { Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, prec}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, prec}}, + addSupportedPrimDesc({{LayoutType::ncsp, prec}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, prec}}, impl_desc_type::ref_any); } void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { - size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getDesc().getDims())[0]; - SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims(); - SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getDims(); - SizeVector srcStrides = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getBlockingDesc().getStrides(); - size_t data_size = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getPrecision().size(); + size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().GetDesc().getShape().getStaticDims())[0]; + SizeVector dst_dims = getChildEdgeAt(0)->getMemory().GetDesc().getShape().getStaticDims(); + SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDesc().getShape().getStaticDims(); + + auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType(); + SizeVector srcStrides = srcDesc.getStrides(); + size_t data_size = srcDesc.getPrecision().size(); if (!src_dims.size()) src_dims = SizeVector(1, 1); @@ -86,7 +88,8 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { IE_THROW() << "Output tensor dimension is smaller then input tensor dimension"; } - InferenceEngine::SizeVector dstStrides = getChildEdgeAt(0)->getDesc().getBlockingDesc().getStrides(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + InferenceEngine::SizeVector dstStrides = dstDesc.getStrides(); InferenceEngine::SizeVector src_aligned(dst_dims.size()); InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); size_t prefix_size = dst_dims.size() - src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp index c6c327a1993f3d..602f4954c3ba91 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -82,9 +80,9 @@ void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() { output_precision = Precision::I32; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, boundaries_precision}}, - {{TensorDescCreatorTypes::ncsp, output_precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, boundaries_precision}}, + {{LayoutType::ncsp, output_precision}}, impl_desc_type::ref_any); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index 4990a658d61f1c..2907a035788a1f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -21,7 +21,8 @@ #include "mkldnn_eltwise_node.h" #include #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -61,19 +62,19 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, cons } void MKLDNNConcatNode::getSupportedDescriptors() { - auto& firstParentDims = getParentEdgeAt(0)->getDims(); + auto& firstParentDims = getParentEdgeAt(0)->getShape().getStaticDims(); for (size_t i = 1; i < getParentEdges().size(); i++) { - auto& dims = getParentEdgeAt(i)->getDims(); + auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); bool incorrectDims = false; - for (size_t j = 0; j < firstParentDims.ndims(); j++) { + for (size_t j = 0; j < firstParentDims.size(); j++) { if (j == axis) continue; - if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { + if (dims.size() != firstParentDims.size() || firstParentDims[j] != dims[j]) { incorrectDims = true; break; } } - if (incorrectDims || firstParentDims.ndims() == 0) { + if (incorrectDims || firstParentDims.size() == 0) { IE_THROW() << "Incorrect input dimensions for concat node " << getName(); } } @@ -100,19 +101,19 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { // Concat supports only equal precisions for inputs and output outputPrecision = inputPrecision; - auto& dstDims = getChildEdgeAt(0)->getDims(); - std::vector tdCreatorTypes = {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc}; + auto& dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + std::vector tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc}; // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation - if (dstDims.ndims() > channelAxis) { - for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c)}) { - SizeVector blkDims = dstDims.ToSizeVector(); + if (dstDims.size() > channelAxis) { + for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { + SizeVector blkDims = dstDims; if (blkDims[channelAxis] % item.first) continue; bool blocked = true; for (size_t i = 0; i < getParentEdges().size(); i++) { - auto& srcDims = getParentEdgeAt(i)->getDims(); + auto& srcDims = getParentEdgeAt(i)->getShape().getStaticDims(); if (srcDims[channelAxis] % item.first) { blocked = false; break; @@ -126,28 +127,27 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { std::vector pdIndexesToReuse; - auto& creatorsMap = TensorDescCreator::getCommonCreators(); - auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast(dstDims.ndims()), tdCreatorTypes); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(dstDims.size()), tdCreatorTypes); for (auto itr = itrRange.first; itr != itrRange.second; ++itr) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.outConfs.resize(1); config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.outConfs[0].desc = itr->second->createDesc(outputPrecision, dstDims.ToSizeVector()); - memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat(); + config.outConfs[0].desc = itr->second->createUniqueDesc(outputPrecision, dstDims); config.inConfs.resize(getParentEdges().size()); for (size_t i = 0; i < getParentEdges().size(); ++i) { config.inConfs[i].inPlace = -1; config.inConfs[i].constant = false; - config.inConfs[i].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getDims().ToSizeVector())); + config.inConfs[i].desc = MemoryDescUtils::applyUndefinedOffset( + itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims())); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFmt); - if (itr->first != TensorDescCreatorTypes::nspc) { + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); + if (itr->first != LayoutType::nspc) { pdIndexesToReuse.push_back(supportedPrimitiveDescriptors.size() - 1); } } @@ -161,8 +161,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto& order = refConfig.outConfs[0].desc.getBlockingDesc().getOrder(); - const auto& blkDims = refConfig.outConfs[0].desc.getBlockingDesc().getBlockDims(); + const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); + const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); auto numOfDim = blkDims.size(); SizeVector offsets(numOfDim, 0lu); @@ -178,17 +178,16 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { } } - config.outConfs[0].desc = TensorDesc(outputPrecision, dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); - memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat(); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(outputPrecision, dstDims, blkDims, order, offset, offsets, strides); for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].desc.getBlockingDesc().getBlockDims(); - const auto& dims = refConfig.inConfs[i].desc.getDims(); + const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); + const auto& dims = refConfig.inConfs[i].desc->getShape().getStaticDims(); config.inConfs[i].inPlace = 0; - config.inConfs[i].desc = TensorDesc(inputPrecision, dims, {srcBlkDims, order, offset, offsets, strides}); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inputPrecision, dims, srcBlkDims, order, offset, offsets, strides); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFmt); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } } @@ -210,7 +209,9 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { canOptimize = false; } - std::map formatFrequency; + std::map formatFrequency; + std::vector supportedLayouts = {LayoutType::ncsp, LayoutType::nspc, LayoutType::nCsp8c, LayoutType::nCsp16c}; + for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); auto parent = parentEdge->getParent(); @@ -224,10 +225,11 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { if (outputIndex < 0 || outputIndex >= parent_config.outConfs.size()) IE_THROW() << "Cannot find index of output node"; const auto &port_desc = parent_config.outConfs[outputIndex].desc; - if (port_desc.getLayout() == Layout::ANY) - continue; - auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc); - formatFrequency[partial_format_desc] += 1; + for (auto& item : supportedLayouts) { + if (port_desc->hasLayoutType(item)) { + formatFrequency[item] += 1; + } + } } for (size_t i = 0; i < getChildEdges().size(); i++) { auto childEdge = getChildEdgeAt(i); @@ -241,37 +243,47 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { if (inputIndex < 0 || inputIndex >= config.inConfs.size()) IE_THROW() << "Cannot find index of output node"; const auto &port_desc = config.inConfs[inputIndex].desc; - if (port_desc.getLayout() == Layout::ANY) - continue; - auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc); - formatFrequency[partial_format_desc] += 1; + for (auto& item : supportedLayouts) { + if (port_desc->hasLayoutType(item)) { + formatFrequency[item] += 1; + } + } } size_t maxCount = 0; - auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector(); - auto convertTo = PartialBlkDesc::makePlain(outDims); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + LayoutType convertTo = LayoutType::ncsp; for (auto &it : formatFrequency) { if (it.second > maxCount) { maxCount = it.second; convertTo = it.first; } else if (it.second == maxCount) { - if (isInQuantizedGraph && it.first == PartialBlkDesc::makeTailC(outDims)) { + if (isInQuantizedGraph && it.first == LayoutType::nspc) { convertTo = it.first; - } else if (it.first == PartialBlkDesc::makeCBlocked(outDims, 8) || it.first == PartialBlkDesc::makeCBlocked(outDims, 16)) { + } else if (it.first == LayoutType::nCsp8c || it.first == LayoutType::nCsp16c) { convertTo = it.first; } } } - if (convertTo.isAutoExtendedWith(outDims)) - convertTo = PartialBlkDesc::makePlain(outDims); - for (size_t i = 0; i < getParentEdges().size(); i++) { - if (convertTo.isAutoExtendedWith(getParentEdgeAt(i)->getDims().ToSizeVector())) - convertTo = PartialBlkDesc::makePlain(outDims); + for (auto& item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { + if (convertTo == item.second) { + if (outDims[1] % item.first != 0) { + convertTo = LayoutType::ncsp; + break; + } + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto& inpDims = getParentEdgeAt(i)->getShape().getStaticDims(); + if (inpDims[1] % item.first != 0) { + convertTo = LayoutType::ncsp; + break; + } + } + } } for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) { - if (PartialBlkDesc::extractFrom(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc) == convertTo) { + if (supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc->hasLayoutType(convertTo)) { if (IMPLICATION(supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown, canOptimize)) { canSelectPrimitive.push_back(i); } @@ -283,7 +295,7 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { return; } - // if there are more then one PD with similar data layouts - select the optimized one + // if there are more than one PD with similar data layouts - select the optimized one for (auto indx : canSelectPrimitive) { if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) { selectPrimitiveDescriptorByIndex(static_cast(indx)); @@ -321,7 +333,7 @@ void MKLDNNConcatNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor is not set."; //check if selected Tensor descriptor has nspc layout and concat axis is C - if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { canOptimizeNspc = true; return; } @@ -337,8 +349,8 @@ void MKLDNNConcatNode::createPrimitive() { } auto desc = srcMemPtr->GetDescriptor(); - auto dims = getParentEdgeAt(i)->getDims(); - for (size_t j = 0; j < dims.ndims(); j++) { + auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); + for (size_t j = 0; j < dims.size(); j++) { desc.data.dims[j] = dims[j]; } @@ -346,8 +358,8 @@ void MKLDNNConcatNode::createPrimitive() { } auto desc = getChildEdgeAt(0)->getMemory().GetDescriptor(); - auto dims = getChildEdgeAt(0)->getDims(); - for (size_t i = 0; i < dims.ndims(); i++) { + auto& dims = getChildEdgeAt(0)->getShape().getStaticDims(); + for (size_t i = 0; i < dims.size(); i++) { desc.data.dims[i] = dims[i]; desc.data.padded_dims[i] = dims[i]; } @@ -370,79 +382,77 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (!isOptimized()) { + if (!isOptimized()) { + MKLDNNNode::initOptimalPrimitiveDescriptor(); auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getConfiguredInputDesc(config, i); + config.inConfs[i].desc = getDefinedInputDesc(config, i); // Concat doesn't support different precision on inputs - config.inConfs[i].desc.setPrecision(inputPrecision); + config.inConfs[i].desc->setPrecision(inputPrecision); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getConfiguredOutputDesc(config, i); - config.outConfs[i].desc.setPrecision(outputPrecision); + config.outConfs[i].desc = getDefinedOutputDesc(config, i); + config.outConfs[i].desc->setPrecision(outputPrecision); } initDescriptor(config); } - - return; } auto config = selected_pd->getConfig(); - if (isInitConfig(config)) + if (isConfigDefined(config)) return; for (size_t i = 0; i < config.outConfs.size(); i++) { - if (!isUninitTensorDesc(config.outConfs[i].desc)) + if (config.outConfs[i].desc->isDefined()) continue; int num = getChildEdgeAt(i)->getOutputNum(); if (num >= 0) { auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - childConf.desc.setPrecision(config.outConfs[i].desc.getPrecision()); + childConf.desc->setPrecision(config.outConfs[i].desc->getPrecision()); if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) { - if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0) + if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor(); - if (!isUninitTensorDesc(childConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[i].desc)) { - config.outConfs[i].desc = childConf.desc; + if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[i].desc)) { + config.outConfs[i].desc = childConf.desc->clone(); continue; } } } - config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), - config.outConfs[i].desc.getDims(), { - config.outConfs[i].desc.getBlockingDesc().getBlockDims(), - config.outConfs[i].desc.getBlockingDesc().getOrder() - }); + + // reset undefined offsets + config.outConfs[i].desc = MemoryDescUtils::resetOffset(config.outConfs[i].desc.get()); } + auto firstOutBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[0].desc); size_t offset = 0; for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), - config.inConfs[i].desc.getDims(), { - config.inConfs[i].desc.getBlockingDesc().getBlockDims(), - config.inConfs[i].desc.getBlockingDesc().getOrder(), - config.outConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, - config.outConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), - config.outConfs[0].desc.getBlockingDesc().getStrides() - }); + auto inpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[i].desc); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inpBlockingDesc.getPrecision(), + inpBlockingDesc.getShape().getStaticDims(), + inpBlockingDesc.getBlockDims(), + inpBlockingDesc.getOrder(), + firstOutBlockingDesc.getOffsetPadding() + offset, + firstOutBlockingDesc.getOffsetPaddingToData(), + firstOutBlockingDesc.getStrides()); size_t axisSize = 1; - if (config.inConfs[0].desc.getLayout() == Layout::NHWC) { - // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for nchw8c - size_t realAxis = inverseOrder(config.inConfs[0].desc.getBlockingDesc().getOrder(), axis); - for (size_t j = realAxis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - size_t jj = config.inConfs[0].desc.getBlockingDesc().getOrder()[j]; - axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[jj]; + auto firstInpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); + if (firstInpBlockingDesc.hasLayoutType(LayoutType::nspc)) { + // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked + size_t realAxis = inverseOrder(firstInpBlockingDesc.getOrder(), axis); + for (size_t j = realAxis; j < inpBlockingDesc.getBlockDims().size(); j++) { + size_t jj = firstInpBlockingDesc.getOrder()[j]; + axisSize *= inpBlockingDesc.getBlockDims()[jj]; } } else { // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[j]; + for (size_t j = axis; j < inpBlockingDesc.getBlockDims().size(); j++) { + axisSize *= inpBlockingDesc.getBlockDims()[j]; } } offset += axisSize; @@ -470,7 +480,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) { } InferenceEngine::Precision MKLDNNConcatNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } void MKLDNNConcatNode::execNspcSpecCase() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 36de12e94d938c..4bff8260c7900a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -18,6 +18,7 @@ #include #include #include "common/cpu_convert.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -68,7 +69,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr biasesDims = { groupOC }; for (int i = 0; i < convolutionOp->get_strides().size(); i++) { - stride.push_back(static_cast(convolutionOp->get_strides()[i])); + stride.push_back(convolutionOp->get_strides()[i]); } for (int i = 0; i < convolutionOp->get_dilations().size(); i++) { dilation.push_back(static_cast(convolutionOp->get_dilations()[i]) - 1); @@ -90,7 +91,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr biasesDims = {groupOC * groupNum}; for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) { - stride.push_back(static_cast(groupConvolutionOp->get_strides()[i])); + stride.push_back(groupConvolutionOp->get_strides()[i]); } for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) { dilation.push_back(static_cast(groupConvolutionOp->get_dilations()[i]) - 1); @@ -142,17 +143,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true); } - if (isWinograd()) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); - }); - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - if (!withBiases) - return MKLDNNMemoryDesc(); - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1)); - }); - } - withSum = false; int expectedInputEdgesNum = static_cast(getOriginalInputsNumber()); for (int i = 0; i < fusedWith.size(); i++) { @@ -169,36 +159,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } - // we can't convert winograd memory descriptor to TensorDesc, so we removed weight and bias edges and put data into internalBlobs - if (isWinograd()) { - std::vector edgesToRemove; - internalBlobs.push_back(createInternalBlob(weightDims, 1, isGrouped)); - edgesToRemove.push_back(getParentEdgeAt(1)); - - if (withBiases) { - internalBlobs.push_back(createInternalBlob(biasesDims, 2)); - edgesToRemove.push_back(getParentEdgeAt(2)); - } - - if (expectedInputEdgesNum - getOriginalInputsNumber() > 0) { - size_t reconnectPort = 1; - for (size_t startPort = 2 + (withBiases ? 1 : 0); startPort < expectedInputEdgesNum; startPort++) { - getParentEdgeAt(startPort)->setChildPort(reconnectPort); - reconnectPort++; - } - } - - for (size_t i = 0; i < edgesToRemove.size(); i++) { - removeEdge(edgesToRemove[i]); - } - - expectedInputEdgesNum -= getOriginalInputsNumber() - 1; - if (withBiases) { - inDims.erase(inDims.begin() + 2); - } - inDims.erase(inDims.begin() + 1); - } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; @@ -229,11 +189,12 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } if (getParentEdges().size() != expectedInputEdgesNum) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); + IE_THROW() << "Incorrect number of input edges for layer " << getName() << ", expected: " << expectedInputEdgesNum + << " actual: " << getParentEdges().size(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims(); + int ndims = getParentEdgesAtPort(0)[0]->getShape().getRank(); MKLDNNDims weightsDims = MKLDNNDims(weightDims); withDWConv = isFusedWith(Convolution); @@ -241,10 +202,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { for (int i = 0; i < fusedWith.size(); i++) { auto *convolutionNode = dynamic_cast(fusedWith[i].get()); if (convolutionNode) { - dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2]; - dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1]; - dw_conv_oc = convolutionNode->outDims[0][1]; - const auto &dwWeightsDims = convolutionNode->inDims[1].ToSizeVector(); + auto& inActivationDims = convolutionNode->inputShapes[0].getStaticDims(); + dw_conv_ih = inActivationDims[convolutionNode->inputShapes[0].getRank() - 2]; + dw_conv_iw = inActivationDims[convolutionNode->inputShapes[0].getRank() - 1]; + + auto& outDims = convolutionNode->outputShapes[0].getStaticDims(); + dw_conv_oc = outDims[1]; + + const auto &dwWeightsDims = convolutionNode->inputShapes[1].getStaticDims(); dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 1]); dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 2]); dw_conv_strides = convolutionNode->getStride(); @@ -262,8 +227,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { for (int j = 0; j < paddingR.size(); j++) { int with_group = isGrouped ? 1 : 0; int krn = weightsDims[with_group + 2 + j]; - int src = getParentEdgeAt(0)->getDims()[2 + j]; - int dst = getChildEdgeAt(0)->getDims()[2 + j]; + int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + j]; + int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + j]; krn = (krn - 1)*(dilation[j] + 1) + 1; int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1; @@ -272,18 +237,18 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } - MKLDNNMemoryDesc in_candidate, out_candidate; + MemoryDescPtr in_candidate, out_candidate; if (canBeExecutedInInt8()) { // We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; if (eltwisePrecision == Precision::BF16) eltwisePrecision = Precision::FP32; - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc - : memory::format_tag::nhwc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc - : memory::format_tag::nhwc); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); + out_candidate = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; @@ -320,33 +285,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c; memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c; + auto inputDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto outputDims = getChildEdgeAt(0)->getShape().getStaticDims(); + if (IC == 1 && groupOC == 1) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else if (IC < 4) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c); - createDescriptor({in_candidate}, {out_candidate}); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp16c); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c); - createDescriptor({in_candidate}, {out_candidate}); - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp8c); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp16c); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp8c); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nspc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nspc); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nspc); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nspc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } } } @@ -421,15 +389,18 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { continue; auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = getSrcMemDesc(itpd, i); - if (!isGrouped) - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc); + auto srcDesc = getSrcMemDesc(itpd, i); + if (isGrouped || srcDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + dataConfig.desc = std::move(srcDesc); + else + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*srcDesc); + config.inConfs.push_back(dataConfig); } @@ -437,34 +408,38 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); + std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); + std::vector dwBiasesDims({dw_conv_oc}); - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); config.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; if (withSum) { dataConfig.inPlace = getParentEdges().size() - 1; } dataConfig.constant = false; - dataConfig.desc = getDstMemDesc(itpd, i); - if (!isGrouped) - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc); + + auto dstDesc = getDstMemDesc(itpd, i); + if (isGrouped || dstDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + dataConfig.desc = std::move(dstDesc); + else + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*dstDesc); + config.outConfs.push_back(dataConfig); if (withSum) { dataConfig.inPlace = -1; - dataConfig.desc.setPrecision(eltwisePrecision); + dataConfig.desc->setPrecision(eltwisePrecision); config.inConfs.push_back(dataConfig); } } @@ -505,9 +480,10 @@ bool MKLDNNConvolutionNode::created() const { return getType() == Convolution; } -void MKLDNNConvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; +void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) { + auto inDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + auto outDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); memory::data_type bdt = memory::data_type::f32; @@ -516,12 +492,9 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector algorithms; @@ -533,17 +506,17 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector conv_desc; if (withBiases) { - MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::format_tag::any}; + mkldnn::memory::desc bias_candidate(blocked_biasesDims, bdt, memory::format_tag::any); conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg, - in_candidate, wgh_candidate, bias_candidate, out_candidate, + inDesc, wgh_candidate, bias_candidate, outDesc, mkldnn::memory::dims(stride.begin(), stride.end()), mkldnn::memory::dims(dilation.begin(), dilation.end()), mkldnn::memory::dims(paddingL.begin(), paddingL.end()), mkldnn::memory::dims(paddingR.begin(), paddingR.end()))); } else { conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg, - in_candidate, wgh_candidate, out_candidate, + inDesc, wgh_candidate, outDesc, mkldnn::memory::dims(stride.begin(), stride.end()), mkldnn::memory::dims(dilation.begin(), dilation.end()), mkldnn::memory::dims(paddingL.begin(), paddingL.end()), @@ -569,7 +542,7 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const { } } -void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) { +void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; @@ -589,14 +562,14 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c // } if (isStridedBlobsSupported) { - createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc}); + createDescriptor({config.inConfs[0].desc.get()}, {config.outConfs[0].desc.get()}); } mkldnn::primitive_attr attr; addZeroPoints(attr); setPostOps(attr); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + auto rightConfig = selectedPD->getConfig(); size_t selected_count = 0; bool containJitImpl = false; @@ -607,10 +580,10 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c continue; auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t j = 0; j < descInputNumbers(desc); j++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, j); @@ -621,27 +594,27 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); + std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); + std::vector dwBiasesDims({dw_conv_oc}); - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); cfg.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); cfg.inConfs.push_back(dataConfig); } for (size_t j = 0; j < descOutputNumbers(desc); j++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, j); if (withSum) { auto eltwiseConfig = dataConfig; - eltwiseConfig.desc.setPrecision(eltwisePrecision); + eltwiseConfig.desc->setPrecision(eltwisePrecision); cfg.inConfs.push_back(eltwiseConfig); dataConfig.inPlace = getParentEdges().size() - 1; } @@ -668,7 +641,7 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c break; } } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } void MKLDNNConvolutionNode::filterSupportedPrimitiveDescriptors() { @@ -729,44 +702,17 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c auto dstMemDesc = MKLDNNMemoryDesc {convDesc->data.dst_desc}; auto srcDataType = convDesc->data.src_desc.data_type; auto dstDataType = convDesc->data.dst_desc.data_type; - bool isPlanarFloatConv = srcMemDesc.isPlainFormat() - && dstMemDesc.isPlainFormat() + bool isPlanarFloatConv = srcMemDesc.hasLayoutType(LayoutType::ncsp) + && dstMemDesc.hasLayoutType(LayoutType::ncsp) && srcDataType == memory::data_type::f32 && dstDataType == memory::data_type::f32; return !isPossibleJitPlanar && isPlanarFloatConv; } -MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else { - if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(), - desc.getBlockingDesc().getOrder().end()) + 1) { - auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector(); - auto new_dims = InferenceEngine::SizeVector({groupNum, div_up(old_dims[0], groupNum)}); - for (int i = 1; i < old_dims.size(); i++) { - new_dims.push_back(old_dims[i]); - } - - auto td = InferenceEngine::TensorDesc(desc.getPrecision(), - new_dims, - desc.getBlockingDesc()); - if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) { - td.setLayout(BLOCKED); - } - return MKLDNNMemoryDesc(td); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); - } - } +std::unique_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); + return MKLDNNPlugin::make_unique(std::move(desc)); } bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { @@ -774,11 +720,11 @@ bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { } const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const { - return isWinograd() ? internalBlobMemory[0]->GetPrimitive() : getParentEdgeAt(1)->getMemory().GetPrimitive(); + return getParentEdgeAt(1)->getMemory().GetPrimitive(); } const mkldnn::memory& MKLDNNConvolutionNode::getBias() const { - return isWinograd() ? internalBlobMemory[1]->GetPrimitive() : getParentEdgeAt(2)->getMemory().GetPrimitive(); + return getParentEdgeAt(2)->getMemory().GetPrimitive(); } InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { @@ -792,7 +738,7 @@ InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } bool MKLDNNConvolutionNode::isNspcAvailable() const { @@ -809,8 +755,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const { } // A bunch of heuristics are designed to cut off not optimal nspc convolution applications - auto inpDims = getParentEdgeAt(0)->getDims().ToSizeVector(); - auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + auto inpDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); auto ndims = inpDims.size(); if (isDepthWise()) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 79b4aef029e3f0..7fa5ed80bb8040 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -20,9 +20,9 @@ class MKLDNNConvolutionNode : public MKLDNNNode { static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; - void initDescriptor(const InferenceEngine::LayerConfig& config) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; + void initDescriptor(const NodeConfig& config) override; void createPrimitive() override; void selectOptimalPrimitiveDescriptor() override; void initSupportedPrimitiveDescriptors() override; @@ -32,13 +32,13 @@ class MKLDNNConvolutionNode : public MKLDNNNode { return false; } InferenceEngine::Precision getRuntimePrecision() const override; - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; const mkldnn::memory& getWeights() const; const mkldnn::memory& getBias() const; size_t descInputNumbers(MKLDNNDescriptor desc) override { - return static_cast(isWinograd() ? 1 : getOriginalInputsNumber()); + return getOriginalInputsNumber(); } bool canBeExecutedInInt8() const; @@ -49,7 +49,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode { std::vector outputCompensation; const InferenceEngine::SizeVector &getWeightDims() { return weightDims; } - const std::vector &getStride() { return stride; } + const std::vector &getStride() { return stride; } const std::vector &getDilation() { return dilation; } const std::vector &getPaddingL() { return paddingL; } const std::vector &getPaddingR() { return paddingR; } @@ -77,18 +77,18 @@ class MKLDNNConvolutionNode : public MKLDNNNode { bool withDWConv; bool isGrouped; bool isPrimitivesPriorityDefined = false; - std::vector stride; + std::vector stride; std::vector dilation; std::vector paddingL; std::vector paddingR; InferenceEngine::SizeVector weightDims; InferenceEngine::SizeVector biasesDims; - ptrdiff_t dw_conv_oc; - ptrdiff_t dw_conv_ih; - ptrdiff_t dw_conv_iw; - std::vector dw_conv_kernel; - std::vector dw_conv_strides; + size_t dw_conv_oc; + size_t dw_conv_ih; + size_t dw_conv_iw; + std::vector dw_conv_kernel; + std::vector dw_conv_strides; mkldnn::memory::data_type dw_conv_in_dt; size_t groupNum; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 678922f3a4b5b1..00a403c8bb6782 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -5,7 +5,7 @@ #include #include "mkldnn_convert_node.h" #include "common/cpu_convert.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include using namespace mkldnn; @@ -38,9 +38,9 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr& op, co MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode("Convert", nodeName, eng, cache) { - inDims.emplace_back(dims); + inputShapes.emplace_back(dims); addOriginalInputPrecision(inPrc); - outDims.emplace_back(dims); + outputShapes.emplace_back(dims); addOriginalOutputPrecision(outPrc); errorPrefix = "Convert node with name '" + getName() + "'"; @@ -49,10 +49,10 @@ MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, co void MKLDNNConvertNode::getSupportedDescriptors() { // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data // from correspond tensor descriptors. - if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY) - outDims.push_back(MKLDNNDims(output->getDims())); - if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY) - inDims.push_back(MKLDNNDims(input->getDims())); + if (outputShapes.empty()) + outputShapes.push_back(output->getShape()); + if (inputShapes.empty()) + inputShapes.push_back(input->getShape()); if (getParentEdges().size() != 1) IE_THROW() << errorPrefix << " has incorrect number of input edges"; if (getChildEdges().empty()) @@ -63,39 +63,40 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - LayerConfig config; - DataConfig dataIn; - DataConfig dataConfigOut; + NodeConfig config; + PortConfig dataIn; + PortConfig dataConfigOut; config.dynBatchSupport = false; // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so // they should be used as the actual descriptors. - if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) { - dataIn.desc = *input; + if (input && output) { + dataIn.desc = input->clone(); config.inConfs.push_back(dataIn); - const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same - dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc); + // inp/out layouts must be the same + dataConfigOut.desc = config.inConfs[0].desc->clone(); + dataConfigOut.desc->setPrecision(output->getPrecision()); config.outConfs.push_back(dataConfigOut); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) { - const SizeVector& insDims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const Shape& insShape = getParentEdgeAt(0)->getShape(); auto insPrecision = getOriginalInputPrecisionAtPort(0); - const SizeVector& outputDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + const Shape& outputShape = getChildEdgeAt(0)->getShape(); auto outPrecision = getOriginalOutputPrecisionAtPort(0); config.inConfs.push_back(dataIn); config.outConfs.push_back(dataConfigOut); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size()); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims); - config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(insPrecision, insShape.getDims())); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(outPrecision, outputShape.getDims())); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } } else { IE_THROW() << errorPrefix << " has incorrect number of input/output edges"; @@ -121,7 +122,7 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) { void* srcPtr = parentMem.GetPtr(); void* dstPtr = childMem.GetPtr(); - cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount()); + cpu_convert(srcPtr, dstPtr, parentMem.GetDesc().getPrecision(), childMem.GetDesc().getPrecision(), parentMem.GetElementsCount()); } bool MKLDNNConvertNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index ca43bb3db5c554..38707385f7a8ba 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -30,19 +30,19 @@ class MKLDNNConvertNode : public MKLDNNNode { // In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method. // This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph // initialization stage. - void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) { - this->input.reset(new InferenceEngine::TensorDesc(input)); - this->output.reset(new InferenceEngine::TensorDesc(output)); + void setDescs(const MemoryDesc& input, const MemoryDesc& output) { + this->input = input.clone(); + this->output = output.clone(); } - std::shared_ptr getInput() const { return input; } - std::shared_ptr getOutput() const { return output; } + const MemoryDesc& getInput() const { return *input; } + const MemoryDesc& getOutput() const { return *output; } static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - std::shared_ptr input; - std::shared_ptr output; + std::unique_ptr input; + std::unique_ptr output; std::string errorPrefix; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp index 34c9aaf191e697..2bf514fffda72e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -59,9 +58,9 @@ void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() { if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16) IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -70,9 +69,9 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) { const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[0]; - const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[1]; - const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2]; + const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0]; + const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1]; + const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2]; const size_t BC = B * C; const size_t CB1 = C * (B - 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp index 0eccdbfa1b5b07..acd273a9ad9b82 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -58,15 +57,15 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() { if (seqLenPrecision != Precision::I32 && seqLenPrecision != Precision::I64) IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, + {{LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, impl_desc_type::ref_any); } @@ -76,13 +75,13 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr()); - const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[0];; - const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[1];; - const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2];; + const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0];; + const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1];; + const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2];; const size_t TC = T * C; int blankIndex = C - 1; - if (inDims.size() > BLANK_INDEX) + if (inputShapes.size() > BLANK_INDEX) blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetPtr()))[0]; size_t workAmount = 0; @@ -91,7 +90,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { std::string errorMsg = errorPrefix + ". Sequence length " + std::to_string(sequenceLengths[b]) + " cannot be greater than according decoded classes dimension size " - + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getDims()[1]); + + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getShape().getStaticDims()[1]); IE_THROW() << errorMsg; } workAmount += sequenceLengths[b]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp index b355dcaefcd4b0..47da05014863bf 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp @@ -46,14 +46,14 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -66,12 +66,12 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) { const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t batchNum = getParentEdgeAt(0)->getDims()[0]; - const size_t maxTime = getParentEdgeAt(0)->getDims()[1]; - const size_t classesNum = getParentEdgeAt(0)->getDims()[2]; + const size_t batchNum = getParentEdgeAt(0)->getShape().getStaticDims()[0]; + const size_t maxTime = getParentEdgeAt(0)->getShape().getStaticDims()[1]; + const size_t classesNum = getParentEdgeAt(0)->getShape().getStaticDims()[2]; int blankIndex = classesNum - 1; - if (inDims.size() > 4) { + if (inputShapes.size() > 4) { blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->GetPtr())[0]; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp index 3f6c8f903482ce..5124409cf8b9d8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // #include "list.hpp" -#include "base.hpp" #include #include @@ -78,20 +77,20 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has unsupported 'axis' input precision: " << axisTensorPrec.name(); } - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, dataPrecision); + inDataConf.emplace_back(LayoutType::ncsp, dataPrecision); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + {{LayoutType::ncsp, dataPrecision}}, impl_desc_type::ref_any); } void MKLDNNCumSumNode::execute(mkldnn::stream strm) { - if (inDims.size() == numOfInputs) - axis = getAxis(getParentEdgeAt(AXIS)->getBlob(), getParentEdgeAt(CUM_SUM_DATA)->getBlob()); + if (inputShapes.size() == numOfInputs) + axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory()); switch (dataPrecision) { case Precision::I8 : { @@ -134,7 +133,7 @@ template void MKLDNNCumSumNode::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getDesc().getBlockingDesc().getStrides(); + const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType().getStrides(); if (reverse) { if (exclusive) { @@ -248,18 +247,18 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forSta return startOffset; } -size_t MKLDNNCumSumNode::getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) const { - const auto& axisPrecision = _axis->getTensorDesc().getPrecision(); - const int64_t dataShapeSize = static_cast(_data->getTensorDesc().getDims().size()); +size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const { + const auto& axisPrecision = _axis.GetDesc().getPrecision(); + const int64_t dataShapeSize = static_cast(_data.GetDesc().getShape().getRank()); int64_t axisValueFromBlob; switch (axisPrecision) { case Precision::I32 : { - const auto *axisPtr = _axis->cbuffer().as(); + const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); axisValueFromBlob = static_cast(axisPtr[0]); break; } case Precision::I64 : { - const auto *axisPtr = _axis->cbuffer().as(); + const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); axisValueFromBlob = axisPtr[0]; break; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h index 794d6bc73f1722..bbe180f5544910 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h @@ -34,7 +34,7 @@ class MKLDNNCumSumNode : public MKLDNNNode { inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const; - size_t getAxis(const InferenceEngine::Blob::CPtr& _axis, const InferenceEngine::Blob::CPtr& _data) const; + size_t getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const; enum { CUM_SUM_DATA, AXIS, numOfInputs }; bool exclusive; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index ba44392e66fda1..62c173c72f5a29 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -16,6 +16,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -108,10 +109,10 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE // So we disconnect constant node containing weights from the graph and then don't use it. if (getParentEdges().size() == 3) { removeEdge(getParentEdgeAt(2)); - inDims.erase(inDims.begin() + 2); + inputShapes.erase(inputShapes.begin() + 2); } removeEdge(getParentEdgeAt(1)); - inDims.erase(inDims.begin() + 1); + inputShapes.erase(inputShapes.begin() + 1); InferenceEngine::SizeVector dimsForBlockedDesc{dims}; std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]); @@ -151,7 +152,7 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { if (!withGroups && stride.back() > 3) return false; if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) { - auto inDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + auto inDims = getChildEdgeAt(0)->getShape().getStaticDims(); // heuristicConst = 2^26 // heuristicParam = IC^2 * SP auto heuristicConst = 67108864; @@ -230,8 +231,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { for (int i = 0; i < paddingR.size(); i++) { int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; int krn = weightDims[with_group + 2 + i]; - int src = getChildEdgeAt(0)->getDims()[2 + i]; - int dst = getParentEdgeAt(0)->getDims()[2 + i]; + int src = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int dst = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; krn = (krn - 1)*(dilation[i] + 1) + 1; int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; @@ -242,15 +243,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { // WA: if int8 deconvolution is supported, we create internal weights blob in IO format std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); internalBlobs.push_back(createWeiBlobAsIO(weightDims)); - auto format = getParentEdgeAt(0)->getDims().ndims() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); - createDescriptor({in_candidate}, {out_candidate}); + auto format = getParentEdgeAt(0)->getShape().getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; + MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + createDescriptor({&in_candidate}, {&out_candidate}); } else { - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); - createDescriptor({in_candidate}, {out_candidate}); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + createDescriptor({&in_candidate}, {&out_candidate}); } } setPostOps(attr); @@ -346,10 +347,10 @@ void MKLDNNDeconvolutionNode::createPrimitive() { } } -void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); - MKLDNNMemoryDesc out_candidate(outputDesc[0]); +void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + const MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + const MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); // grouping and autoblicking is not compatible if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) @@ -361,7 +362,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector deconv_desc; deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, in_candidate, wgh_candidate, out_candidate, @@ -370,7 +371,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector conv_desc; conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, @@ -399,52 +400,21 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisionAtPort(2), - getParentEdgeAt(2)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(2)->getDims().ToSizeVector()))); + auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); + return MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), dataType, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(2)->getShape().getRank())); } - InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) + MKLDNNMemoryDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else { - if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(), - desc.getBlockingDesc().getOrder().end()) + 1) { - auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector(); - auto new_dims = weightDims; - - auto td = InferenceEngine::TensorDesc(desc.getPrecision(), - new_dims, - desc.getBlockingDesc()); - if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) { - td.setLayout(BLOCKED); - } - return MKLDNNMemoryDesc(td); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); - } - } + return MKLDNNPlugin::make_unique(std::move(desc)); } -MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)) - : MKLDNNMemoryDesc(primitive_desc_it.diff_src_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return isInt8 ? MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)) : + MKLDNNPlugin::make_unique(primitive_desc_it.diff_src_desc(idx)); } InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const { @@ -458,7 +428,7 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index f3f47c83a9f761..15ee71d6af74ac 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -17,8 +17,8 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void createPrimitive() override; void filterSupportedPrimitiveDescriptors() override; void filterSupportedDescriptors(); @@ -31,8 +31,8 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { return static_cast(getParentEdges().size()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index a2fae182a52f70..4151e03673eba7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -45,6 +45,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ mov(reg_kernel, ptr[this->param1 + GET_OFF(filt)]); if (jcp_.with_bias) mov(reg_bias, ptr[this->param1 + GET_OFF(bias)]); + if (jcp_.with_modulation) + mov(reg_modulation, ptr[this->param1 + GET_OFF(modulation)]); mov(reg_output, ptr[this->param1 + GET_OFF(dst)]); mov(reg_input_buffer, ptr[this->param1 + GET_OFF(buf)]); mov(reg_oh_pos, ptr[param1 + GET_OFF(oh_pos)]); @@ -71,24 +73,26 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ reg64_t reg_def_off = r9; reg64_t reg_kernel = r10; reg64_t reg_bias = r11; + reg64_t reg_modulation = rcx; reg64_t reg_output = r12; reg64_t reg_oh_pos = r13; reg64_t aux_reg_bias = rsi; reg64_t reg_ow_pos = rdx; reg64_t aux_reg_output = reg_ow_pos; reg64_t reg_dg_iter = reg_output; + reg64_t reg_gr_iter = rsp; reg64_t aux_reg_input = rax; reg64_t aux2_reg_input = reg_kernel; reg64_t reg_ic_iter = rbx; reg64_t reg_oc_work = reg_ic_iter; reg64_t aux_reg_def_off = reg_bias; - reg64_t reg_input_buffer = abi_not_param1; reg64_t aux_reg_input_buffer = r14; reg32_t reg_tmp_32 = r15d; reg64_t reg_tmp_64 = r15; reg64_t reg_table = rbp; + reg64_t reg_input_buffer = aux_reg_input; reg64_t aux_reg_kernel = reg_table; - reg64_t aux2_reg_kernel = r15; + reg64_t aux2_reg_kernel = reg_tmp_64; reg64_t aux2_reg_input_buffer = aux_reg_bias; reg64_t aux3_reg_input_buffer = reg_input; @@ -119,6 +123,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ add(reg_input, jcp_.ur_w * jcp_.stride_w * jcp_.ic * jcp_.typesize_in); add(reg_def_off, jcp_.ur_w * jcp_.typesize_off); + if (jcp_.with_modulation) { + add(reg_modulation, jcp_.ur_w * jcp_.typesize_modulation); + } add(reg_output, jcp_.ur_w * jcp_.oc * jcp_.typesize_out); add(reg_ow_pos, jcp_.ur_w); @@ -217,7 +224,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ Label exit; push(reg_oc_work); - push(aux_reg_bias); + if (jcp_.with_bias) + push(aux_reg_bias); mov(aux2_reg_kernel, aux_reg_kernel); mov(aux2_reg_input_buffer, reg_input_buffer); @@ -243,8 +251,8 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ apply_filter(ow_step, oc_blocks_step, oc_step, jcp_.ic % jcp_.ic_block); } } - - pop(aux_reg_bias); + if (jcp_.with_bias) + pop(aux_reg_bias); pop(reg_oc_work); } @@ -256,6 +264,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ mov(aux_reg_def_off, reg_def_off); mov(aux_reg_input, reg_input); mov(aux2_reg_input_buffer, aux_reg_input_buffer); + if (jcp_.with_modulation) { + push(reg_modulation); + } xor_(reg_dg_iter, reg_dg_iter); const int ic_per_def_group = jcp_.ic / jcp_.dg; @@ -271,10 +282,14 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ Label ic_loop_tail; Label ic_loop_zeros; Label loop_end; - Label h_sec_opt; - Label h_sec_opt_exit; - Label w_sec_opt; - Label w_sec_opt_exit; + Label v1_condition_end_main; + Label v2_condition_end_main; + Label v3_condition_end_main; + Label v4_condition_end_main; + Label v1_condition_end_tail; + Label v2_condition_end_tail; + Label v3_condition_end_tail; + Label v4_condition_end_tail; mov(aux2_reg_input, aux_reg_input); add(aux2_reg_input, (ow * jcp_.stride_w * jcp_.ic) * jcp_.typesize_in); @@ -287,45 +302,48 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ Xmm xmm_map_h = Xmm(2); Xmm xmm_ih_in = Xmm(4); Xmm xmm_ih_im = Xmm(1); - Xmm xmm_cur_height = xmm_ih_im; Xmm xmm_h_low = xmm_ih_in; - Xmm xmm_h_high = xmm_cur_height; + Xmm xmm_h_high = xmm_ih_im; Xmm xmm_lh = xmm_map_h; Xmm xmm_hh = Xmm(3); Xmm xmm_map_w = Xmm(6); Xmm xmm_iw_in = Xmm(8); Xmm xmm_iw_im = Xmm(5); - Xmm xmm_cur_width = xmm_iw_im; Xmm xmm_w_low = xmm_iw_in; - Xmm xmm_w_high = xmm_cur_width; + Xmm xmm_w_high = xmm_iw_im; Xmm xmm_lw = xmm_map_w; Xmm xmm_hw = Xmm(7); - Xmm xmm_v1_off = Xmm(9); - Xmm xmm_v2_off = Xmm(10); - Xmm xmm_v3_off = Xmm(11); - Xmm xmm_v4_off = Xmm(12); + Xmm xmm_v1_off = xmm_lh; + Xmm xmm_v2_off = xmm_hh; + Xmm xmm_v3_off = xmm_lw; + Xmm xmm_v4_off = xmm_hw; + + Xmm xmm_cur_height = Xmm(13); + Xmm xmm_cur_width = Xmm(14); - Xmm xmm_w1 = xmm_h_low; - Xmm xmm_w2 = xmm_h_high; - Xmm xmm_w3 = xmm_w_low; - Xmm xmm_w4 = xmm_w_high; + Xmm xmm_w1 = Xmm(9); + Xmm xmm_w2 = Xmm(10); + Xmm xmm_w3 = Xmm(11); + Xmm xmm_w4 = Xmm(12); - Xmm xmm_v1 = xmm_lh; - Xmm xmm_v2 = xmm_hh; - Xmm xmm_v3 = xmm_lw; - Xmm xmm_v4 = xmm_hw; + Xmm xmm_v1 = xmm_v1_off; + Xmm xmm_v2 = xmm_v2_off; + Xmm xmm_v3 = xmm_v3_off; + Xmm xmm_v4 = xmm_v4_off; - Vmm vmm_w1 = Vmm(xmm_h_low.getIdx()); - Vmm vmm_w2 = Vmm(xmm_h_high.getIdx()); - Vmm vmm_w3 = Vmm(xmm_w_low.getIdx()); - Vmm vmm_w4 = Vmm(xmm_w_high.getIdx()); + Vmm vmm_w1 = Vmm(xmm_w1.getIdx()); + Vmm vmm_w2 = Vmm(xmm_w2.getIdx()); + Vmm vmm_w3 = Vmm(xmm_w3.getIdx()); + Vmm vmm_w4 = Vmm(xmm_w4.getIdx()); - Vmm vmm_v1 = Vmm(xmm_lh.getIdx()); - Vmm vmm_v2 = Vmm(xmm_hh.getIdx()); - Vmm vmm_v3 = Vmm(xmm_lw.getIdx()); - Vmm vmm_v4 = Vmm(xmm_hw.getIdx()); + Vmm vmm_v1 = Vmm(xmm_v1_off.getIdx()); + Vmm vmm_v2 = Vmm(xmm_v2_off.getIdx()); + Vmm vmm_v3 = Vmm(xmm_v3_off.getIdx()); + Vmm vmm_v4 = Vmm(xmm_v4_off.getIdx()); + + // condition check size_t def_off_h = ((2 * (kh * jcp_.kw + kw) + 0) * jcp_.oh * jcp_.ow) + ow; mov(reg_tmp_32, ptr[aux_reg_def_off + def_off_h * jcp_.typesize_off]); @@ -356,6 +374,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ size_t def_off_w = ((2 * (kh * jcp_.kw + kw) + 1) * jcp_.oh * jcp_.ow) + ow; mov(reg_tmp_32, ptr[aux_reg_def_off + def_off_w * jcp_.typesize_off]); + movq(xmm_tmp, reg_tmp_64); mov(reg_tmp_32, float2int(static_cast((kw * (jcp_.dilate_w + 1))))); movq(xmm_map_w, reg_tmp_64); @@ -380,83 +399,53 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ cmp(reg_tmp_32, 0); je(init_with_zeros, T_NEAR); + // interpolation calculation movd(xmm_cur_height, table_val(3)); psubd(xmm_cur_height, xmm_ih_in); roundps(xmm_h_low, xmm_map_h, 1); cvtps2dq(xmm_h_low, xmm_h_low); + maxss(xmm_h_low, table_val(0)); - movups(xmm_tmp, xmm_cur_height); - pcmpgtd(xmm_tmp, xmm_h_low); - - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(h_sec_opt, T_NEAR); - - movups(xmm_h_low, xmm_cur_height); - movups(xmm_h_high, xmm_h_low); - jmp(h_sec_opt_exit); - - L(h_sec_opt); - - movups(xmm_h_high, xmm_h_low); - paddd(xmm_h_high, table_val(5)); - - L(h_sec_opt_exit); - - cvtdq2ps(xmm_tmp, xmm_h_low); - subss(xmm_lh, xmm_tmp); - movss(xmm_hh, table_val(5)); - cvtdq2ps(xmm_hh, xmm_hh); - subss(xmm_hh, xmm_lh); - + if (jcp_.with_bi_pad) { + movdqu(xmm_h_high, xmm_h_low); + paddd(xmm_h_high, table_val(5)); + } else { + roundps(xmm_h_high, xmm_map_h, 2); + cvtps2dq(xmm_h_high, xmm_h_high); + minss(xmm_h_high, xmm_cur_height); + } movd(xmm_cur_width, table_val(4)); psubd(xmm_cur_width, xmm_iw_in); roundps(xmm_w_low, xmm_map_w, 1); cvtps2dq(xmm_w_low, xmm_w_low); + maxss(xmm_w_low, table_val(0)); - movups(xmm_tmp, xmm_cur_width); - pcmpgtd(xmm_tmp, xmm_w_low); - - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(w_sec_opt, T_NEAR); - - movups(xmm_w_low, xmm_cur_width); - movups(xmm_w_high, xmm_w_low); - jmp(w_sec_opt_exit); - - L(w_sec_opt); - - movups(xmm_w_high, xmm_w_low); - paddd(xmm_w_high, table_val(5)); - - L(w_sec_opt_exit); + if (jcp_.with_bi_pad) { + movdqu(xmm_w_high, xmm_w_low); + paddd(xmm_w_high, table_val(5)); + } else { + roundps(xmm_w_high, xmm_map_w, 2); + cvtps2dq(xmm_w_high, xmm_w_high); + minss(xmm_w_high, xmm_cur_width); + } cvtdq2ps(xmm_tmp, xmm_w_low); subss(xmm_lw, xmm_tmp); + movss(xmm_hw, table_val(5)); cvtdq2ps(xmm_hw, xmm_hw); subss(xmm_hw, xmm_lw); + cvtdq2ps(xmm_tmp, xmm_h_low); + subss(xmm_lh, xmm_tmp); - movups(xmm_v1_off, table_val(2)); - cvtps2dq(xmm_v1_off, xmm_v1_off); - movups(xmm_v3_off, xmm_v1_off); - - pmulld(xmm_v1_off, xmm_h_low); - movups(xmm_v2_off, xmm_v1_off); - paddd(xmm_v1_off, xmm_w_low); - paddd(xmm_v2_off, xmm_w_high); - - pmulld(xmm_v3_off, xmm_h_high); - movups(xmm_v4_off, xmm_v3_off); - paddd(xmm_v3_off, xmm_w_low); - paddd(xmm_v4_off, xmm_w_high); - + movss(xmm_hh, table_val(5)); + cvtdq2ps(xmm_hh, xmm_hh); + subss(xmm_hh, xmm_lh); movss(xmm_w1, xmm_hh); mulss(xmm_w1, xmm_hw); @@ -487,29 +476,97 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ movq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + // w_low >= 0 + movups(xmm_tmp, xmm_w_low); + pcmpgtd(xmm_tmp, table_val(0)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // jne(v1_condition_end_main, T_NEAR); + + // h_low >= 0 + movups(xmm_tmp, xmm_h_low); + pcmpgtd(xmm_tmp, table_val(0)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // jne(v1_condition_end_main, T_NEAR); + uni_vmovups(vmm_v1, ptr[reg_tmp_64]); uni_vmulps(vmm_v1, vmm_v1, vmm_w1); + L(v1_condition_end_main); + pmovsxdq(xmm_v2_off, xmm_v2_off); movq(reg_tmp_64, xmm_v2_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + + // w_high <= cur_width - 1 + movups(xmm_tmp, xmm_w_high); + psubd(xmm_tmp, table_val(0)); + pcmpgtd(xmm_tmp, table_val(4)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // je(v2_condition_end_main, T_NEAR); + + // h_low >= 0 + movups(xmm_tmp, xmm_h_low); + pcmpgtd(xmm_tmp, table_val(0)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // jne(v2_condition_end_main, T_NEAR); + uni_vmovups(vmm_v2, ptr[reg_tmp_64]); uni_vmulps(vmm_v2, vmm_v2, vmm_w2); + L(v2_condition_end_main); pmovsxdq(xmm_v3_off, xmm_v3_off); movq(reg_tmp_64, xmm_v3_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + + // w_low >= 0 + movups(xmm_tmp, xmm_w_low); + pcmpgtd(xmm_tmp, table_val(0)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // jne(v3_condition_end_main, T_NEAR); + + // h_high <= cur_height + movups(xmm_tmp, xmm_h_high); + psubd(xmm_tmp, table_val(0)); + pcmpgtd(xmm_tmp, table_val(3)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // je(v3_condition_end_main, T_NEAR); + uni_vmovups(vmm_v3, ptr[reg_tmp_64]); uni_vmulps(vmm_v3, vmm_v3, vmm_w3); + L(v3_condition_end_main); pmovsxdq(xmm_v4_off, xmm_v4_off); movq(reg_tmp_64, xmm_v4_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + + // w_high <= cur_width + movups(xmm_tmp, xmm_w_high); + psubd(xmm_tmp, table_val(0)); + pcmpgtd(xmm_tmp, table_val(3)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // je(v4_condition_end_main, T_NEAR); + + // h_high <= cur_height + movups(xmm_tmp, xmm_h_high); + psubd(xmm_tmp, table_val(0)); + pcmpgtd(xmm_tmp, table_val(4)); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + // je(v4_condition_end_main, T_NEAR); + uni_vmovups(vmm_v4, ptr[reg_tmp_64]); uni_vmulps(vmm_v4, vmm_v4, vmm_w4); + L(v4_condition_end_main); uni_vaddps(vmm_v1, vmm_v1, vmm_v2); uni_vaddps(vmm_v1, vmm_v1, vmm_v3); @@ -529,37 +586,123 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ size_t input_buffer_off = (size_t) kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; + movss(xmm_v1, table_val(0)); + // w_low >= 0 + movq(reg_tmp_64, xmm_w_low); + cmp(reg_tmp_32, 0); + jl(v1_condition_end_tail, T_NEAR); + + // h_low >= 0 + movq(reg_tmp_64, xmm_h_low); + cmp(reg_tmp_32, 0); + jl(v1_condition_end_tail, T_NEAR); + + movups(xmm_v1_off, table_val(2)); + cvtps2dq(xmm_v1_off, xmm_v1_off); + pmulld(xmm_v1_off, xmm_h_low); + paddd(xmm_v1_off, xmm_w_low); pmovsxdq(xmm_v1_off, xmm_v1_off); + movq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); movss(xmm_v1, ptr[reg_tmp_64]); mulss(xmm_v1, xmm_w1); - + L(v1_condition_end_tail); + + movss(xmm_v2, table_val(0)); + // w_high <= cur_width - 1 + movq(xmm_tmp, xmm_w_high); + pcmpgtd(xmm_tmp, xmm_cur_width); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + jne(v2_condition_end_tail, T_NEAR); + + // h_low >= 0 + movq(reg_tmp_64, xmm_h_low); + cmp(reg_tmp_32, 0); + jl(v2_condition_end_tail, T_NEAR); + + + movups(xmm_v2_off, table_val(2)); + cvtps2dq(xmm_v2_off, xmm_v2_off); + pmulld(xmm_v2_off, xmm_h_low); + paddd(xmm_v2_off, xmm_w_high); pmovsxdq(xmm_v2_off, xmm_v2_off); + movq(reg_tmp_64, xmm_v2_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); movss(xmm_v2, ptr[reg_tmp_64]); mulss(xmm_v2, xmm_w2); - + L(v2_condition_end_tail); + + movss(xmm_v3, table_val(0)); + // w_low >= 0 + movq(reg_tmp_64, xmm_w_low); + cmp(reg_tmp_32, 0); + jl(v3_condition_end_tail, T_NEAR); + + // h_high <= cur_height - 1 + movq(xmm_tmp, xmm_h_high); + pcmpgtd(xmm_tmp, xmm_cur_height); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + jne(v3_condition_end_tail, T_NEAR); + + movups(xmm_v3_off, table_val(2)); + cvtps2dq(xmm_v3_off, xmm_v3_off); + pmulld(xmm_v3_off, xmm_h_high); + paddd(xmm_v3_off, xmm_w_low); pmovsxdq(xmm_v3_off, xmm_v3_off); + movq(reg_tmp_64, xmm_v3_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); movss(xmm_v3, ptr[reg_tmp_64]); mulss(xmm_v3, xmm_w3); + L(v3_condition_end_tail); + + movss(xmm_v4, table_val(0)); + // w_high <= cur_width - 1 + movq(xmm_tmp, xmm_w_high); + pcmpgtd(xmm_tmp, xmm_cur_width); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + jne(v4_condition_end_tail, T_NEAR); + + // h_high <= cur_height - 1 + movq(xmm_tmp, xmm_h_high); + pcmpgtd(xmm_tmp, xmm_cur_height); + movq(reg_tmp_64, xmm_tmp); + cmp(reg_tmp_32, 0); + jne(v4_condition_end_tail, T_NEAR); + + + movups(xmm_v4_off, table_val(2)); + cvtps2dq(xmm_v4_off, xmm_v4_off); + pmulld(xmm_v4_off, xmm_h_high); + paddd(xmm_v4_off, xmm_w_high); pmovsxdq(xmm_v4_off, xmm_v4_off); + movq(reg_tmp_64, xmm_v4_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + movss(xmm_v4, ptr[reg_tmp_64]); mulss(xmm_v4, xmm_w4); + L(v4_condition_end_tail); addss(xmm_v1, xmm_v2); addss(xmm_v1, xmm_v3); addss(xmm_v1, xmm_v4); + + if (jcp_.with_modulation) { + size_t modulation_offset = ((kh * jcp_.kw + kw) * jcp_.oh * jcp_.ow) + ow; + mulss(xmm_v1, ptr[reg_modulation + modulation_offset * jcp_.typesize_modulation]); + } + movss(ptr[aux3_reg_input_buffer + input_buffer_off * jcp_.typesize_in], xmm_v1); add(aux2_reg_input, jcp_.typesize_in); @@ -593,13 +736,18 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ } add(aux_reg_def_off, 2 * jcp_.kh * jcp_.kw * jcp_.oh * jcp_.ow * jcp_.typesize_off); + if (jcp_.with_modulation) { + add(reg_modulation, jcp_.kh * jcp_.kw * jcp_.oh * jcp_.ow * jcp_.typesize_modulation); + } add(aux_reg_input, ic_per_def_group * jcp_.typesize_in); add(aux2_reg_input_buffer, ic_per_def_group * jcp_.typesize_in); inc(reg_dg_iter); jmp(dg_loop, T_NEAR); } - L(dg_loop_end); + if (jcp_.with_modulation) { + pop(reg_modulation); + } } void store_output(int ow_step, int oc_blocks_step, int oc_step) { @@ -679,35 +827,40 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ mov(aux_reg_input_buffer, reg_input_buffer); push(reg_output); - push(reg_bias); + if (jcp_.with_bias) + push(reg_bias); push(reg_input); push(reg_kernel); + push(reg_input_buffer); interpolate_input(ow_step); + pop(reg_input_buffer); pop(reg_kernel); pop(reg_input); - pop(reg_bias); + if (jcp_.with_bias) + pop(reg_bias); pop(reg_output); push(reg_ow_pos); mov(aux_reg_kernel, reg_kernel); mov(aux_reg_output, reg_output); - mov(aux_reg_bias, reg_bias); + if (jcp_.with_bias) + mov(aux_reg_bias, reg_bias); mov(reg_oc_work, jcp_.oc); L(oc_unrolled_loop); { cmp(reg_oc_work, jcp_.nb_oc_blocking * jcp_.oc_block); jl(oc_main_loop, T_NEAR); - ic_loop(ow_step, jcp_.nb_oc_blocking, jcp_.oc_block); store_output(ow_step, jcp_.nb_oc_blocking, jcp_.oc_block); add(aux_reg_kernel, jcp_.nb_oc_blocking * jcp_.nb_ic * jcp_.kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block * jcp_.typesize_in); add(aux_reg_output, jcp_.nb_oc_blocking * jcp_.oc_block * jcp_.typesize_out); - add(aux_reg_bias, jcp_.nb_oc_blocking * jcp_.oc_block * jcp_.typesize_bia); + if (jcp_.with_bias) + add(aux_reg_bias, jcp_.nb_oc_blocking * jcp_.oc_block * jcp_.typesize_bia); sub(reg_oc_work, jcp_.nb_oc_blocking * jcp_.oc_block); jmp(oc_unrolled_loop, T_NEAR); @@ -716,13 +869,13 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ L(oc_main_loop); { cmp(reg_oc_work, jcp_.oc_block); jl(oc_tail, T_NEAR); - ic_loop(ow_step, 1, jcp_.oc_block); store_output(ow_step, 1, jcp_.oc_block); add(aux_reg_kernel, jcp_.nb_ic * jcp_.kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block * jcp_.typesize_in); add(aux_reg_output, jcp_.oc_block * jcp_.typesize_out); - add(aux_reg_bias, jcp_.oc_block * jcp_.typesize_bia); + if (jcp_.with_bias) + add(aux_reg_bias, jcp_.oc_block * jcp_.typesize_bia); sub(reg_oc_work, jcp_.oc_block); jmp(oc_main_loop, T_NEAR); @@ -741,9 +894,10 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - const auto defConvNode = ngraph::as_type_ptr(op); - if (!defConvNode) { - errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1."; + if (!one_of(op->get_type_info(), + ngraph::op::v1::DeformableConvolution::type_info, + ngraph::op::v8::DeformableConvolution::type_info)) { + errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1 or v8."; return false; } } catch (...) { @@ -759,46 +913,52 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } - auto defConvNode = ngraph::as_type_ptr(op); + auto defConvNodeBase = std::dynamic_pointer_cast(op); - group = defConvNode->get_group(); - deformable_group = defConvNode->get_deformable_group(); - - auto& strides = defConvNode->get_strides(); + group = defConvNodeBase->get_group(); + deformable_group = defConvNodeBase->get_deformable_group(); + auto& strides = defConvNodeBase->get_strides(); for (int i = 0; i < strides.size(); i++) { stride.push_back(strides[i]); } - auto& dilations = defConvNode->get_dilations(); + auto& dilations = defConvNodeBase->get_dilations(); for (int i = 1; i <= dilations.size(); i++) { dilation.push_back(dilations[dilations.size() - i] - 1); } - paddingL = defConvNode->get_pads_begin(); + paddingL = defConvNodeBase->get_pads_begin(); + + if (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info) { + auto defConvNode = std::dynamic_pointer_cast(op); + with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad(); + } else { + with_bilinear_pad = false; + } } void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' "; - if (getParentEdges().size() != 3) + if (getParentEdges().size() != 3 && getParentEdges().size() != 4) IE_THROW() << errorPrefix << "has incorrect number of input edges"; if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getDims().ndims() != 4) { + if (getParentEdgeAt(0)->getShape().getRank() != 4) { IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; } - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getParentEdgeAt(2)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); + if (getParentEdgeAt(2)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } } @@ -806,22 +966,35 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::LayerConfig config; + const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + if (group != 1 && (((getParentEdgeAt(0)->getShape().getStaticDims()[0] / group) % simd_w != 0) + || ((getChildEdgeAt(0)->getShape().getStaticDims()[1] / group) % simd_w != 0))) { + enforceRef = true; + } + + size_t inputsNumber = getOriginalInputsNumber(); + NodeConfig config; config.dynBatchSupport = false; - config.inConfs.resize(3); + config.inConfs.resize(inputsNumber); config.inConfs[0].constant = false; config.inConfs[0].inPlace = -1; config.inConfs[1].constant = false; config.inConfs[1].inPlace = -1; - config.inConfs[1].constant = false; - config.inConfs[1].inPlace = -1; + config.inConfs[2].constant = false; + config.inConfs[2].inPlace = -1; + if (inputsNumber > 3) { + config.inConfs[3].constant = false; + config.inConfs[3].inPlace = -1; + } config.outConfs.resize(1); config.outConfs[0].constant = false; config.outConfs[0].inPlace = -1; impl_desc_type impl_type; - if (mayiuse(cpu::x64::avx512_common)) { + if (enforceRef) { + impl_type = impl_desc_type::ref; + } else if (mayiuse(cpu::x64::avx512_common)) { impl_type = impl_desc_type::jit_avx512; } else if (mayiuse(cpu::x64::avx2)) { impl_type = impl_desc_type::jit_avx2; @@ -831,27 +1004,54 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - if (mayiuse(cpu::x64::sse41)) { - // optimzed implementation + if (!enforceRef && mayiuse(cpu::x64::sse41)) { + // optimized implementation auto dataFormat = memory::format_tag::nhwc; auto offFormat = memory::format_tag::nchw; auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, offFormat); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat); - supportedPrimitiveDescriptors.push_back({config, impl_type, dataFormat}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + memory::data_type::f32, dataFormat); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), + memory::data_type::f32, offFormat); + + auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims(); + if (group > 1 && wDims.size() != 5) { + auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)}); + for (int i = 1; i < wDims.size(); i++) { + new_dims.push_back(wDims[i]); + } + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + memory::data_type::f32, weiFormat); + } else { + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + memory::data_type::f32, weiFormat); + } + + + if (inputsNumber > 3) { + config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(3)->getShape().getStaticDims(), + memory::data_type::f32, memory::format_tag::nchw); + } + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + memory::data_type::f32, dataFormat); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } else { // reference implementation - auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw; - - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, memory::format_tag::oihw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - supportedPrimitiveDescriptors.push_back({config, impl_type, weiFormat}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::oihw); + if (inputsNumber > 3) { + config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + } + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } } @@ -861,13 +1061,14 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); - auto weiDims = config.inConfs[2].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto weiDims = getParentEdgeAt(2)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); jcp.dg = deformable_group; jcp.ngroups = group; + jcp.mb = srcDims[0]; jcp.oc = dstDims[1] / jcp.ngroups; @@ -878,9 +1079,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { jcp.oh = dstDims[2]; jcp.ow = dstDims[3]; - bool with_groups = group > 1; - jcp.kh = weiDims[with_groups + 2]; - jcp.kw = weiDims[with_groups + 3]; +// bool with_groups = group > 1; + jcp.kh = weiDims[2]; + jcp.kw = weiDims[3]; jcp.t_pad = paddingL[0]; jcp.l_pad = paddingL[1]; @@ -892,6 +1093,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { jcp.dilate_w = dilation[1]; jcp.with_bias = false; + jcp.with_bi_pad = with_bilinear_pad; + jcp.with_modulation = getParentEdges().size() > 3; const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; jcp.ic_block = simd_w; @@ -904,13 +1107,16 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { jcp.typesize_in = sizeof(float); jcp.typesize_off = sizeof(float); jcp.typesize_out = sizeof(float); + jcp.typesize_modulation = sizeof(float); jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3; jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4; jcp.nthr = dnnl_get_max_threads(); - if (mayiuse(cpu::x64::avx512_common)) { + if (enforceRef) { + return; + } else if (mayiuse(cpu::x64::avx512_common)) { def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32(jcp)); } else if (mayiuse(cpu::x64::avx2)) { def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32(jcp)); @@ -924,9 +1130,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst, const std::vector& src_strides, const std::vector& off_strides, - const std::vector& wei_strides, const std::vector& dst_strides) { + const std::vector& wei_strides, const std::vector& dst_strides, + const float* modulation, const std::vector& modulation_strides) { const bool with_groups = jcp.ngroups > 1; - const int G = jcp.ngroups; const int MB = jcp.mb; const int OH = jcp.oh; @@ -950,8 +1156,9 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f const int DG = jcp.dg; - const int channel_per_deformable_group = IC * G / DG; + const int channel_per_deformable_group = (IC * G) / DG; + const bool with_bi_pad = jcp.with_bi_pad; auto ker = [=](int g, int mb, int oc, int oh, int ow) { float d = 0; const int h_in = oh * KSH - padT; @@ -959,56 +1166,72 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f for (int ic = 0; ic < IC; ic++) { const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1] + h_in * src_strides[2] + w_in * src_strides[3]; - const int deformable_group_index = ic / channel_per_deformable_group; + const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group; const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1]; + const float *modulation_offset_ptr = nullptr; + if (modulation != nullptr) { + modulation_offset_ptr = modulation + mb * modulation_strides[0] + (deformable_group_index * KH * KW) * modulation_strides[1]; + } + for (int kh = 0; kh < KH; kh++) { for (int kw = 0; kw < KW; kw++) { const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; const float offset_h = data_offset_ptr[data_offset_h_index]; const float offset_w = data_offset_ptr[data_offset_w_index]; - float val = 0.0f; - const float h_im = h_in + kh * (KDH + 1) + offset_h; - const float w_im = w_in + kw * (KDW + 1) + offset_w; - - if (h_im >= 0 && w_im >= 0 && h_im < IH && w_im < IW) { - float map_h = kh * (KDH + 1) + offset_h; - float map_w = kw * (KDW + 1) + offset_w; - const int cur_height = IH - h_in; - const int cur_width = IW - w_in; - int h_low = static_cast(floorf(map_h)); - int w_low = static_cast(floorf(map_w)); - int h_high; - int w_high; - if (h_low >= cur_height - 1) { - h_high = h_low = cur_height - 1; - map_h = static_cast(h_low); - } else { - h_high = h_low + 1; - } - - if (w_low >= cur_width - 1) { - w_high = w_low = cur_width - 1; - map_w = static_cast(w_low); - } else { - w_high = w_low + 1; - } + float map_h = kh * (KDH + 1) + offset_h; // kernel index with offset + float map_w = kw * (KDW + 1) + offset_w; // kernel index with offset + + const float h_im = h_in + map_h; // absolute pixel index with offset + const float w_im = w_in + map_w; // absolute pixel index with offset + bool skip_compute; + if (with_bilinear_pad) { + skip_compute = !(static_cast(w_im) > -1 && + static_cast(w_im) < IW && + static_cast(h_im) > -1 && + static_cast(h_im) < IH); + } else { + skip_compute = !(w_im >= 0 && + w_im < IW && + h_im >= 0 && + h_im < IH); + } + if (!skip_compute) { + const int cur_h_end = IH - h_in; + const int cur_w_end = IW - w_in; + int h_low = with_bi_pad ? static_cast(floorf(map_h)) : + std::max(static_cast(floorf(map_h)), 0); + int w_low = with_bi_pad ? static_cast(floorf(map_w)) : + std::max(static_cast(floorf(map_w)), 0); + const int cur_h_start = h_low + h_in; + const int cur_w_start = w_low + w_in; + int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast(ceilf(map_h)), cur_h_end - 1); + int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast(ceilf(map_w)), cur_w_end - 1); float lh = map_h - h_low; float lw = map_w - w_low; float hh = 1 - lh, hw = 1 - lw; - float v1 = data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]]; - float v2 = data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]]; - float v3 = data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]]; - float v4 = data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]]; + float v1 = (cur_w_start >= 0 && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]] : 0.0f; + float v2 = (w_high < cur_w_end && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]] : 0.0f; + float v3 = (cur_w_start >= 0 && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]] : 0.0f; + float v4 = (w_high < cur_w_end && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]] : 0.0f; float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + + float modulation_scalar = 1.0f; + + if (modulation_offset_ptr != nullptr) { + size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3]; + modulation_scalar = modulation_offset_ptr[modulation_index]; + } + + const float weight = with_groups ? weights[(g + oc / G) * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + + kw * wei_strides[3]] + : weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]; + d += val * weight * modulation_scalar; } - d += val * (with_groups ? weights[g * wei_strides[0] + oc * wei_strides[1] + ic * wei_strides[2] + kh * wei_strides[3] + - kw * wei_strides[4]] - : weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]); } } } @@ -1017,14 +1240,15 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f }; parallel_nd(G, MB, OC, OH, OW, - [&](int g, int mb, int oc, int oh, int ow) { + [&](int g, int mb, int oc, int oh, int ow) { dst[mb * dst_strides[0] + (g * OC + oc) * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]] = ker(g, mb, oc, oh, ow); }); } void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const float* offsets, const float* weights, float* dst, const std::vector& src_strides, const std::vector& off_strides, - const std::vector& dst_strides) { + const std::vector& dst_strides, const float* modulation, + const std::vector& modulation_strides) { size_t buffer_size = (size_t)jcp.nthr * jcp.ur_w * jcp.kh * jcp.kw * jcp.ic * jcp.typesize_in; std::vector input_buffer(buffer_size, 0); float* input_buffer_ptr = &input_buffer[0]; @@ -1040,6 +1264,11 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f par_conv.src = &src[n * src_strides[0] + _ic*jcp.ic_block * src_strides[1] + (oh * jcp.stride_h - jcp.t_pad) * src_strides[2] - jcp.l_pad * src_strides[3]]; par_conv.off = &offsets[n * off_strides[0] + oh * off_strides[2]]; + if (modulation != nullptr) { + par_conv.modulation = &modulation[n * modulation_strides[0] + oh * modulation_strides[2]]; + } else { + par_conv.modulation = nullptr; + } par_conv.filt = weights; par_conv.dst = &dst[n * dst_strides[0] + _oc*jcp.oc_block * dst_strides[1] + oh * dst_strides[2]]; @@ -1052,6 +1281,8 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f } void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { + const size_t inputsNumber = getOriginalInputsNumber(); + auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); auto &srcMemory2 = getParentEdgeAt(2)->getMemory(); @@ -1060,6 +1291,11 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { const auto *src = reinterpret_cast(srcMemory0.GetPtr()); const auto *offsets = reinterpret_cast(srcMemory1.GetPtr()); const auto *weights = reinterpret_cast(srcMemory2.GetPtr()); + float* modulation = nullptr; + if (inputsNumber > 3) { + modulation = reinterpret_cast(getParentEdgeAt(3)->getMemory().GetPtr()); + } + float *dst = reinterpret_cast(dstMemory.GetPtr()); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); @@ -1067,25 +1303,31 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto src_block_desc = config.inConfs[0].desc.getBlockingDesc(); + auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType(); std::vector src_strides(src_block_desc.getStrides().size()); for (int i = 0; i < src_strides.size(); i++) { src_strides[src_block_desc.getOrder()[i]] = src_block_desc.getStrides()[i]; } - auto dst_block_desc = config.outConfs[0].desc.getBlockingDesc(); + auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector dst_strides(dst_block_desc.getStrides().size()); for (int i = 0; i < dst_strides.size(); i++) { dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i]; } - auto off_strides = config.inConfs[1].desc.getBlockingDesc().getStrides(); - auto wei_strides = config.inConfs[2].desc.getBlockingDesc().getStrides(); + + auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType().getStrides(); + auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType().getStrides(); + InferenceEngine::SizeVector modulation_strides; + if (inputsNumber > 3) { + modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType().getStrides(); + } + if (def_conv_kernel) { - executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides); + executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides, modulation, modulation_strides); } else { - executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides); + executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides, modulation, modulation_strides); } } @@ -1094,7 +1336,7 @@ bool MKLDNNDeformableConvolutionNode::created() const { } InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h index e74e49788ccda6..d71946f77a3b51 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h @@ -22,8 +22,6 @@ struct jit_def_conv_params { int kd, kh, kw; int stride_d, stride_h, stride_w; int dilate_d, dilate_h, dilate_w; - bool with_bias; - bool with_sum; int nthr; int nb_ic, ic_block; int nb_oc, oc_block; @@ -32,13 +30,19 @@ struct jit_def_conv_params { int ur_w_tail; int typesize_in; int typesize_off; + int typesize_modulation; int typesize_bia; int typesize_out; + bool with_bias; + bool with_sum; + bool with_modulation; + bool with_bi_pad; }; struct jit_def_conv_call_args { const void *src; const void *off; + const void *modulation; const void *filt; const void *bias; const void *dst; @@ -75,11 +79,13 @@ class MKLDNNDeformableConvolutionNode : public MKLDNNNode { bool canBeInPlace() const override { return false; } + bool enforceRef = false; InferenceEngine::Precision getRuntimePrecision() const override; private: size_t group = 1; + bool with_bilinear_pad = false; std::vector stride = {}; std::vector dilation = {}; std::vector paddingL = {}; @@ -92,10 +98,11 @@ class MKLDNNDeformableConvolutionNode : public MKLDNNNode { void executeReference(const float* src, const float* offsets, const float* weights, float* dst, const std::vector& src_strides, const std::vector& off_strides, - const std::vector& wei_strides, const std::vector& dst_strides); + const std::vector& wei_strides, const std::vector& dst_strides, + const float* modulation = nullptr, const std::vector& modulation_strides = {}); void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst, - const std::vector& src_strides, const std::vector& off_strides, - const std::vector& dst_strides); + const std::vector& src_strides, const std::vector& off_strides, const std::vector& dst_strides, + const float* modulation = nullptr, const std::vector& modulation_strides = {}); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp index 38bebcd5271072..a117d3acbdcd4d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp @@ -6,7 +6,7 @@ #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include @@ -58,7 +58,7 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr(std::pow(blockSize, nSpatialDims)); } else { IE_THROW(NotImplemented) << errorMessage; @@ -66,13 +66,13 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr 5) THROW_ERROR << "doesn't support dimensions with rank greater than 5"; - SizeVector dstDims = outDims[0].ToSizeVector(); + SizeVector dstDims = outputShapes[0].getStaticDims(); if (srcDims.size() != dstDims.size()) THROW_ERROR << "has incorrect number of input/output dimensions"; @@ -99,8 +99,8 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getDims(); - const size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = srcDims.size(); impl_desc_type impl_type; if (mayiuse(impl::cpu::x64::avx512_common)) { @@ -113,7 +113,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -122,27 +122,27 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2) { auto canUseBlocked = [=](const size_t block) { return srcDims[1] % block == 0 && (srcDims[1] / block) % blockStep == 0 && (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector()); - config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); + config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -156,18 +156,19 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims(); - SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims(); + SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -193,8 +194,8 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -223,7 +224,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp index 0a2f4fc814021d..1d44dd3f7475e5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -116,13 +115,13 @@ void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -132,12 +131,12 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { const float *loc_data = reinterpret_cast(getParentEdgeAt(idx_location)->getMemoryPtr()->GetPtr()); const float *conf_data = reinterpret_cast(getParentEdgeAt(idx_confidence)->getMemoryPtr()->GetPtr()); const float *prior_data = reinterpret_cast(getParentEdgeAt(idx_priors)->getMemoryPtr()->GetPtr()); - const float *arm_conf_data = inDims.size() > 3 ? + const float *arm_conf_data = inputShapes.size() > 3 ? reinterpret_cast(getParentEdgeAt(idx_arm_confidence)->getMemoryPtr()->GetPtr()) : nullptr; - const float *arm_loc_data = inDims.size() > 4 ? + const float *arm_loc_data = inputShapes.size() > 4 ? reinterpret_cast(getParentEdgeAt(idx_arm_location)->getMemoryPtr()->GetPtr()) : nullptr; - const int N = getParentEdgeAt(idx_confidence)->getDims()[0]; + const int N = getParentEdgeAt(idx_confidence)->getShape().getStaticDims()[0]; float *decoded_bboxes_data = _decoded_bboxes.data(); float *reordered_conf_data = _reordered_conf.data(); @@ -286,8 +285,8 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { } } - const int num_results = getChildEdgesAtPort(0)[0]->getDims()[2]; - const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getDims()[3]; + const int num_results = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[2]; + const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[3]; if (DETECTION_SIZE != 7) { IE_THROW() << NOT_IMPLEMENTED; } @@ -300,7 +299,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { else dst_data_size = N * _num_classes * _num_priors * DETECTION_SIZE * sizeof(float); - if (dst_data_size > getChildEdgesAtPort(0)[0]->getBlob()->byteSize()) { + if (dst_data_size > getChildEdgesAtPort(0)[0]->getMemory().GetSize()) { IE_THROW() << OUT_OF_BOUNDS; } memset(dst_data, 0, dst_data_size); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp index b9ef511d010fce..1796d49989e9eb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp @@ -48,20 +48,20 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr& op, const mkld } /* Data */ - inputShape = inDims[DATA_INDEX].ToSizeVector(); + inputShape = inputShapes[DATA_INDEX].getStaticDims(); if (inputShape.size() < 2) { IE_THROW() << layerErrorPrefix << " has invalid 'data' input tensor with rank: " << inputShape.size(); } /* Axes */ - const auto axesRank = inDims[AXES_INDEX].ndims(); + const auto axesRank = inputShapes[AXES_INDEX].getRank(); if (axesRank != 1) { IE_THROW() << layerErrorPrefix << " has invalid 'axes' input tensor with rank: " << axesRank; } /* Signal size */ if (inputsNumber > SIGNAL_SIZE_INDEX) { - const auto signalSizeRank = inDims[SIGNAL_SIZE_INDEX].ndims(); + const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank(); if (signalSizeRank != 1) { IE_THROW() << layerErrorPrefix << " has invalid 'signal_size' input tensor with rank: " << signalSizeRank; } @@ -93,12 +93,12 @@ void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() { } } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > SIGNAL_SIZE_INDEX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } namespace { @@ -225,7 +225,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector(axesEdge->getMemoryPtr()->GetPtr()); - axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getDims()[0]); + axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getShape().getStaticDims()[0]); for (auto& axis : axes) { if (axis < 0) { axis += inputShape.size() - 1; @@ -233,7 +233,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } std::sort(axes.begin(), axes.end()); - outputShape = getChildEdgeAt(0)->getDims().ToSizeVector(); + outputShape = getChildEdgeAt(0)->getShape().getStaticDims(); for (size_t axis : axes) { size_t nComplex = outputShape[axis]; // FFT uses different twiddle factors @@ -247,8 +247,8 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { const auto *input = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); - auto inputStrides = inputDataEdge->getDesc().getBlockingDesc().getStrides(); - auto outputStrides = outputDataEdge->getDesc().getBlockingDesc().getStrides(); + auto inputStrides = inputDataEdge->getMemory().GetDescWithType().getStrides(); + auto outputStrides = outputDataEdge->getMemory().GetDescWithType().getStrides(); if (inputShape != outputShape) { copyDataToOutputWithSignalSize(input, inputShape, inputStrides, output, outputShape, outputStrides); } else { @@ -257,7 +257,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } // 1d case - if (inputDataEdge->getDesc().getDims().size() == 2) { + if (inputDataEdge->getShape().getRank() == 2) { size_t nComplex = outputShape[0]; if (IsPowerOfTwo(nComplex)) { fft(output, nComplex * 2, true); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 9dd250d7b96151..d777e22210f324 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -125,11 +125,11 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) { post_op_emitters.push_back(create_eltwise_emitter(*eltwiseNode.getFusedWith()[i].get(), exec_prc)); } else if (eltwiseNode.getFusedWith()[i].get()->getType() == FakeQuantize) { - auto fakeQuantizeNode = dynamic_cast(eltwiseNode.getFusedWith()[i].get()); - fakeQuantizeNode->appendPostOps(post_ops); + auto fakeQuantizeNode = dynamic_cast(eltwiseNode.getFusedWith()[i].get()); + fakeQuantizeNode->appendPostOps(post_ops); - quantization_injectors.push_back(std::make_shared>( - this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias)); + quantization_injectors.push_back(std::make_shared>( + this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias)); } } @@ -965,9 +965,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const { } bool MKLDNNEltwiseNode::isWithBroadcast() { - auto oDims = outDims[0].ToSizeVector(); - for (size_t i = 0; i < inDims.size(); i++) { - auto iDims = inDims[i].ToSizeVector(); + auto oDims = outputShapes[0].getStaticDims(); + for (size_t i = 0; i < inputShapes.size(); i++) { + auto iDims = inputShapes[i].getStaticDims(); if (iDims != oDims) return true; } @@ -1080,10 +1080,10 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { Blocked }; - auto initDesc = [&] (LayoutType lt) -> PrimitiveDescInfo { - auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> TensorDesc { - if (lt == ChannelsFirst && edge->getDims().ndims() != 1) { - auto dims = edge->getDims().ToSizeVector(); + auto initDesc = [&] (LayoutType lt) -> NodeDesc { + auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr { + if (lt == ChannelsFirst && edge->getShape().getRank() != 1) { + auto dims = edge->getShape().getStaticDims(); auto ndims = dims.size(); std::vector order(ndims); std::iota(order.begin(), order.end(), 0); @@ -1097,11 +1097,11 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks[i] = dims[order[i]]; } - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); - } else if (lt == Blocked && edge->getDims().ndims() != 1 && edge->getDims()[1] != 1) { + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); + } else if (lt == Blocked && edge->getShape().getRank() != 1 && edge->getShape().getStaticDims()[1] != 1) { size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8; - std::vector blocks = edge->getDims().ToSizeVector(); + std::vector blocks = edge->getShape().getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); @@ -1109,37 +1109,38 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks.push_back(blockSize); order.push_back(1); - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); } else { - std::vector blocks = edge->getDims().ToSizeVector(); + std::vector blocks = edge->getShape().getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); } }; size_t offset = std::numeric_limits::max(); - InferenceEngine::LayerConfig config; - config.dynBatchSupport = getChildEdgeAt(0)->getDims().ndims() > 1 && getChildEdgeAt(0)->getDims() == getParentEdgeAt(0)->getDims(); + NodeConfig config; + config.dynBatchSupport = getChildEdgeAt(0)->getShape().getRank() > 1 && getChildEdgeAt(0)->getShape() == + getParentEdgeAt(0)->getShape(); for (size_t i = 0; i < getParentEdges().size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; - dataConfig.constant = false; + PortConfig portConfig; + portConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; + portConfig.constant = false; - dataConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); + portConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); - config.inConfs.push_back(dataConfig); + config.inConfs.push_back(portConfig); } - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; + PortConfig portConfig; + portConfig.inPlace = -1; + portConfig.constant = false; - dataConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset); + portConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset); - config.outConfs.push_back(dataConfig); + config.outConfs.push_back(portConfig); impl_desc_type impl_type; if (mayiuse(x64::avx512_common)) { @@ -1155,18 +1156,20 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { return {config, impl_type}; }; - bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 2, 4, 5); + bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 2, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 2, 4, 5); - isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1, - getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims()); + isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 2, 4, 5); + isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, + getChildEdgeAt(0)->getShape().getRank() == + getParentEdgeAt(i)->getShape().getRank()); } - bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 4, 5); + bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 4, 5); - isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1, - getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims()); + isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 4, 5); + isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, + getChildEdgeAt(0)->getShape().getRank() == + getParentEdgeAt(i)->getShape().getRank()); } if (isChannelsFirstApplicable) @@ -1177,9 +1180,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { } void MKLDNNEltwiseNode::createPrimitive() { - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - - auto initDims = [this, config](size_t maxInputSize) { + auto initDims = [this](size_t maxInputSize) { size_t inputNum = getParentEdges().size(); dims_in.resize(inputNum); @@ -1189,8 +1190,9 @@ void MKLDNNEltwiseNode::createPrimitive() { dims_out.resize(maxInputSize, 1); + auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector order(maxInputSize); - auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder(); + auto outOrder = outBlockingDesc.getOrder(); for (size_t i = 0; i < order.size(); i++) { if (i < order.size() - outOrder.size()) order[i] = i; @@ -1198,17 +1200,18 @@ void MKLDNNEltwiseNode::createPrimitive() { order[i] = outOrder[i - (order.size() - outOrder.size())] + (order.size() - outOrder.size()); } - size_t outRank = config.outConfs[0].desc.getBlockingDesc().getBlockDims().size(); + size_t outRank = outBlockingDesc.getBlockDims().size(); for (int i = 0; i < outRank; i++) { - dims_out[dims_out.size() - 1 - i] = config.outConfs[0].desc.getBlockingDesc().getBlockDims()[outRank - 1 - i]; + dims_out[dims_out.size() - 1 - i] = outBlockingDesc.getBlockDims()[outRank - 1 - i]; } for (int i = 0; i < inputNum; i++) { - size_t inRank = config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); + auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + size_t inRank = inBlockingDesc.getBlockDims().size(); // WA to normalize blocked and planar layouts - auto inOrder = config.inConfs[i].desc.getBlockingDesc().getOrder(); - size_t startOff = outOrder.size() != config.outConfs[0].desc.getDims().size() && + auto inOrder = inBlockingDesc.getOrder(); + size_t startOff = outOrder.size() != outBlockingDesc.getShape().getRank() && outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0; // WA to handle nspc layout with 1D tensors @@ -1217,7 +1220,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } for (int j = 0; j < inRank; j++) { - dims_in[i][dims_in[i].size() - 1 - j - startOff] = config.inConfs[i].desc.getBlockingDesc().getBlockDims()[inRank - 1 - j]; + dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc.getBlockDims()[inRank - 1 - j]; } } @@ -1229,13 +1232,13 @@ void MKLDNNEltwiseNode::createPrimitive() { } }; - auto initOffsets = [this, config](size_t maxInputSize) { + auto initOffsets = [this](size_t maxInputSize) { size_t inputNum = getParentEdges().size(); offsets_out.resize(maxInputSize, 1); offset_out_calc(offsets_out, dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_out[j] *= config.outConfs[0].desc.getPrecision().size(); + offsets_out[j] *= getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); } offsets_in.resize(inputNum); @@ -1243,7 +1246,7 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_in[i].resize(maxInputSize, 1); offset_in_calc(offsets_in[i], dims_in[i], dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_in[i][j] *= config.inConfs[i].desc.getPrecision().size(); + offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().GetDesc().getPrecision().size(); } } @@ -1287,10 +1290,11 @@ void MKLDNNEltwiseNode::createPrimitive() { } }; - tensorRank = std::max(static_cast(optimalTensorRank), config.outConfs[0].desc.getBlockingDesc().getBlockDims().size()); + auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc.getBlockDims().size()); initDims(tensorRank); - auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder(); + auto outOrder = outBlockingDesc.getOrder(); size_t oc_size = 0; offsets_oc.resize(tensorRank, 0); if (isFusedWith(FakeQuantize)) { @@ -1310,7 +1314,7 @@ void MKLDNNEltwiseNode::createPrimitive() { fullWorkAmount *= dims_out[i]; } - isDynBatchEnabled = config.dynBatchSupport; + isDynBatchEnabled = getSelectedPrimitiveDescriptor()->getConfig().dynBatchSupport; size_t minimalConcurrency = parallel_get_max_threads(); size_t minimalJitWorkAmount = 256; @@ -1320,7 +1324,7 @@ void MKLDNNEltwiseNode::createPrimitive() { bool hasDifferentDims = false; while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount && // we shouldn't collapse batch dimension in case dynamic batch is enabled - (!isDynBatchEnabled || (config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() - collapsedDims > 2))) { + (!isDynBatchEnabled || (outBlockingDesc.getBlockDims().size() - collapsedDims > 2))) { if (dims_out.size() - collapsedDims - 2 < 0) break; @@ -1372,22 +1376,24 @@ void MKLDNNEltwiseNode::createPrimitive() { } } - batchDimIdx = tensorRank - config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() + collapsedDims; + batchDimIdx = tensorRank - outBlockingDesc.getBlockDims().size() + collapsedDims; schedulerWorkAmount = fullWorkAmount / dims_out[dims_out.size() - 1]; initOffsets(tensorRank); - jep.inputs_number = config.inConfs.size(); + const size_t inpuPortsCount = getSelectedPrimitiveDescriptor()->getConfig().inConfs.size(); + + jep.inputs_number = inpuPortsCount; jep.input_size = tensorRank; - for (int i = 0; i < config.inConfs.size(); i++) { + for (int i = 0; i < inpuPortsCount; i++) { jep.src_size[i] = dims_in[i][dims_in[i].size() - 1]; - jep.src_prc[i] = config.inConfs[i].desc.getPrecision(); + jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().GetDesc().getPrecision(); } jep.dst_size = dims_out[dims_out.size() - 1]; - jep.dst_prc = config.outConfs[0].desc.getPrecision(); + jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().GetDesc().getPrecision(); - for (int i = 0; i < config.inConfs.size(); i++) { + for (int i = 0; i < inpuPortsCount; i++) { jep.src_offsets[i] = offsets_in[i]; } jep.dst_offsets = offsets_out; @@ -1415,13 +1421,13 @@ void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getConfiguredInputDesc(config, i); + config.inConfs[i].desc = std::move(getDefinedInputDesc(config, i)); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getConfiguredOutputDesc(config, i); + config.outConfs[i].desc = std::move(getDefinedOutputDesc(config, i)); } initDescriptor(config); @@ -1641,13 +1647,13 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { } } - return getParentEdgesAtPort(0)[0].get()->getDims() == getChildEdgesAtPort(0)[0].get()->getDims(); + return getParentEdgesAtPort(0)[0].get()->getShape() == getChildEdgesAtPort(0)[0].get()->getShape(); } void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API. specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd && - getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector(); + getParentEdgesAtPort(0)[0]->getShape() == getParentEdgesAtPort(1)[0]->getShape(); if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) { fillScalesAndShifts(parentNode.get(), scales, shifts, 16); } @@ -1770,7 +1776,7 @@ InferenceEngine::Precision MKLDNNEltwiseNode::getRuntimePrecision() const { } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNEltwiseNode, Eltwise); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp index f59b69b023d99c..4499e91dacb9bd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp @@ -62,15 +62,15 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() { @@ -122,7 +122,8 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingBagOffsetSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp index 3318e1089faeed..f185d08588157d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp @@ -58,12 +58,12 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() { @@ -89,7 +89,8 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingBagPackedSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp index 8abeee76d769ba..853da79accf576 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp @@ -44,13 +44,12 @@ MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode( template void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsData, T* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) { + const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims) { std::string msgPrefix = std::string("Node EmbeddingBagSum with name '") + _layerName + "' "; initFromInputs(); - const auto& inDataDims = srcDesc.getDims(); - const size_t outputBagsNum = dstDesc.getDims()[0]; + const size_t outputBagsNum = outDataDims[0]; auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); @@ -115,27 +114,27 @@ void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsDa parallel_nt(0, threadBody); } -void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) { - switch (srcDesc.getPrecision()) { +void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, + const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims) { + switch (srcPrc) { case Precision::FP32: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } case Precision::I8: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } case Precision::U8: { - return processData::value_type>(srcData, weightsData, dstData, srcDesc, dstDesc); + return processData::value_type>(srcData, weightsData, dstData, inDims, outDims); } case Precision::I32: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } default: { IE_THROW() << "EmbeddingBagSum layer does not support precision '" - + std::string(srcDesc.getPrecision().name()) + "'"; + + std::string(srcPrc.name()) + "'"; } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h index f3513501b5c74a..ef5e7ed9a2f917 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h @@ -21,8 +21,8 @@ class MKLDNNEmbeddingBagSumNode { size_t perSampleWeightsIdx, size_t defaultIndexIdx); - void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, + const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims); ~MKLDNNEmbeddingBagSumNode() = default; @@ -37,7 +37,7 @@ class MKLDNNEmbeddingBagSumNode { template void processData(const T* srcData, const T* weightsData, T* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims); const size_t EMB_TABLE_IDX = 0lu; const size_t INDICES_IDX; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp index 82eae04dcc2193..1cea74dc5fb886 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp @@ -62,21 +62,21 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() { indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - indicesSize_ = getParentEdgeAt(INDICES_IDX)->getBlob()->size(); + indicesSize_ = getParentEdgeAt(INDICES_IDX)->getShape().getElementsCount(); segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr()); @@ -124,7 +124,8 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingSegmentsSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp index fe2362003f377a..d04b80b0086280 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -253,22 +252,22 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) { - const int rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; - assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getDims()[1])); - assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getDims()[1])); + const int rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; + assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[1])); + assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1])); const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp index 255f8443765660..8bd70dd2a6ebde 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -313,36 +311,36 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::initSupportedP if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn::stream strm) { try { - if (inDims.size() != 4 || outDims.size() != 2) { + if (inputShapes.size() != 4 || outputShapes.size() != 2) { IE_THROW() << "Incorrect number of input or output edges!"; } size_t anchor_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector().size(); i++) { - anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getShape().getRank(); i++) { + anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getShape().getStaticDims()[i]; } size_t deltas_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector().size(); i++) { - deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getShape().getRank(); i++) { + deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[i]; } if (anchor_dims_size != deltas_dims_size) IE_THROW() << "'Anchors' blob size for ONNXProposal is incompatible with 'deltas' blob size!"; size_t score_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector().size(); i++) { - score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getShape().getRank(); i++) { + score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[i]; } if (deltas_dims_size != (4 * score_dims_size)) IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"; @@ -356,11 +354,11 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getDims()[0]; + const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[0]; // bottom shape: (num_anchors) x H x W - const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getDims()[1]; - const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getDims()[2]; + const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1]; + const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[2]; // input image height & width const float img_H = p_img_info_cpu[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp index b5d073a0b3552e..001257c443d419 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -55,22 +53,22 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDe if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream strm) { - const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getDims()[0]; - assert(getParentEdgeAt(INPUT_PRIORS)->getDims()[1] == 4); + const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[0]; + assert(getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[1] == 4); // Execute - const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[3]; - const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[2]; - const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getDims()[3]) / layer_width; - const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getDims()[2]) / layer_height; + const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[3]; + const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[2]; + const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[3]) / layer_width; + const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[2]) / layer_height; const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp index 94e7f033a95548..09313e30bd64aa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -341,27 +339,27 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream strm) { - const int levels_num = inDims.size() - INPUT_FEATURES_START; - const int num_rois = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; - const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getDims()[1]; + const int levels_num = inputShapes.size() - INPUT_FEATURES_START; + const int num_rois = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; + const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getShape().getStaticDims()[1]; const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num; auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetPtr()); float *output_rois = nullptr; - if (OUTPUT_ROIS < outDims.size()) { + if (OUTPUT_ROIS < outputShapes.size()) { output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); } @@ -381,8 +379,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream const int level_rois_num = rois_per_level[i + 1] - level_rois_offset; if (level_rois_num > 0) { auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr()); - const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[2]; - const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[3]; + const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[2]; + const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[3]; ROIAlignForward_cpu_kernel(feaxels_per_roi * level_rois_num, featuremap, 1.0f / pyramid_scales_[i], diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp index d543658f78e724..f77c3fcb2b08b6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -51,14 +49,14 @@ void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors( if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) { - const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; + const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp index d4c5d3037962b0..13ada3cf81dfa5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -411,8 +409,8 @@ void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() { if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end()) IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -421,12 +419,12 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { char *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); const size_t dtype_size = getOriginalInputPrecisionAtPort(0).size(); - const auto& inDims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const auto& inDims = getParentEdgeAt(0)->getShape().getStaticDims(); const size_t IC = inDims[1]; const size_t IH = inDims[2]; const size_t IW = inDims[3]; - const auto& outDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + const auto& outDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); const size_t OB = outDims[0]; const size_t OH = outDims[2]; const size_t OW = outDims[3]; @@ -436,8 +434,8 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { const size_t RH = _rates[0], RW = _rates[1]; const size_t PT = _pad_top, PL = _pad_left; - const std::vector istrides = getParentEdgeAt(0)->getDesc().getBlockingDesc().getStrides(); - const std::vector ostrides = getChildEdgesAtPort(0)[0]->getDesc().getBlockingDesc().getStrides(); + const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType().getStrides(); + const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); const std::vector ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]}; if (extract_image_patches_kernel) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index b12bed6a47672b..b08ebae30f4c41 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -19,6 +19,7 @@ #include "ie_parallel.hpp" #include +#include // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -219,7 +220,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ this->preamble(); - if (jqp_.src_layout == Layout::CHW || jqp_.src_layout == Layout::NCHW || jqp_.src_layout == Layout::NCDHW) + if (jqp_.is_planar) compute_planar(); else compute_generic(); @@ -1090,31 +1091,23 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr MKLDNNFakeQuantizeNode::getDataFormats() const { +std::vector MKLDNNFakeQuantizeNode::getDataFormats() const { // Special case for first FQ in the network - if (getParentEdgesAtPort(0)[0]->getDims()[getAxis()] == 3) { - return { MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims()) }; + if (getParentEdgesAtPort(0)[0]->getShape().getStaticDims()[getAxis()] == 3) { + return { LayoutType::ncsp }; } else { if (isBinarization()) { - return {memory::format_tag::nhwc}; + return { LayoutType::nspc }; } else { - switch (getParentEdgesAtPort(0)[0]->getDims().ndims()) { - case 4: - if (getAxis() == 1) { - auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c; - return {blkFormat, memory::format_tag::nhwc, memory::format_tag::nchw}; - } else { - return {memory::format_tag::nchw}; - } - case 5: - if (getAxis() == 1) { - auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nCdhw16c : memory::format_tag::nCdhw8c; - return {blkFormat, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; - } else { - return {memory::format_tag::ncdhw}; - } - default: - return {MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims())}; + if (one_of(getParentEdgesAtPort(0)[0]->getShape().getRank(), 4, 5)) { + if (getAxis() == 1) { + auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c; + return { blkFormat, LayoutType::nspc, LayoutType::ncsp }; + } else { + return { LayoutType::ncsp }; + } + } else { + return { LayoutType::ncsp }; } } } @@ -1147,12 +1140,12 @@ void MKLDNNFakeQuantizeNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << "has unsupported number of parent edges at port " << i; } - if (getParentEdgesAtPort(0)[0]->getDims().ndims() != getChildEdgesAtPort(0)[0]->getDims().ndims()) { + if (getParentEdgesAtPort(0)[0]->getShape().getRank() != getChildEdgesAtPort(0)[0]->getShape().getRank()) { IE_THROW() << errorPrefix << "has different ranks for input and output tensors"; } if (isBinarization()) { - if (getParentEdgesAtPort(0)[0]->getDims().ndims() != 4ul) { + if (getParentEdgesAtPort(0)[0]->getShape().getRank() != 4ul) { IE_THROW() << errorPrefix << "doesn't support input/output rank != 4"; } } @@ -1189,47 +1182,52 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { } } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getInputPrecision()); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOutputPrecision()); - for (auto& fmt : getDataFormats()) { - LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < getParentEdges().size(); i++) { - DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; if (i == 0) { - dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt); + auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); + dataConfig.desc = descCreator->createUniqueDesc(getInputPrecision(), getParentEdgeAt(i)->getShape().getStaticDims()); } else { - dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), memory::data_type::f32, - MKLDNNMemory::GetPlainFormat(getParentEdgeAt(i)->getDims())); + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + dataConfig.desc = descCreator->createUniqueDesc(Precision::FP32, getParentEdgeAt(i)->getShape().getStaticDims()); } config.inConfs.push_back(dataConfig); } - DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt); + auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); + dataConfig.desc = descCreator->createUniqueDesc(getOutputPrecision(), getChildEdgeAt(0)->getShape().getStaticDims()); config.outConfs.push_back(dataConfig); - supportedPrimitiveDescriptors.push_back({config, impl_type, fmt}); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } } void MKLDNNFakeQuantizeNode::createPrimitive() { auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto inDims = config.inConfs[0].desc.getDims(); + auto inDims = config.inConfs[0].desc->getShape().getStaticDims(); jqp.c = inDims.size() > 1 ? inDims[1] : 1; - jqp.src_prc = config.inConfs[0].desc.getPrecision(); + jqp.src_prc = config.inConfs[0].desc->getPrecision(); jqp.wei_prc = Precision::FP32; - jqp.dst_prc = config.outConfs[0].desc.getPrecision(); + jqp.dst_prc = config.outConfs[0].desc->getPrecision(); - jqp.src_layout = config.inConfs[0].desc.getLayout(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + jqp.s_str = srcDesc.getStrides(); + + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + jqp.d_str = dstDesc.getStrides(); + + jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5); jqp.op_type = getAlgorithm(); @@ -1258,7 +1256,7 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { if (quantize_kernel) quantize_kernel->create_ker(); - size_t axisSize = getParentEdgeAt(0)->getDims()[getAxis()]; + size_t axisSize = getParentEdgeAt(0)->getShape().getStaticDims()[getAxis()]; size_t axisPaddedSize = rnd_up(axisSize, 16); MKLDNNMemoryDesc weightsDataDesc = {{(uint32_t)axisPaddedSize}, memory::data_type::f32, memory::format_tag::x}; @@ -1297,12 +1295,11 @@ void MKLDNNFakeQuantizeNode::executeReference() { auto src = reinterpret_cast(srcMemory->GetPtr()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = srcMemory->GetDesc().getShape().getStaticDims(); + auto dstDims = dstMemory->GetDesc().getShape().getStaticDims(); - auto s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); - auto d_str = config.outConfs[0].desc.getBlockingDesc().getStrides(); + auto s_str = jqp.s_str; + auto d_str = jqp.d_str; const int N = srcDims[0]; const int C = srcDims.size() > 1 ? srcDims[1] : 1; @@ -1419,10 +1416,9 @@ void MKLDNNFakeQuantizeNode::executeBinarization() { auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto src_dims = config.inConfs[0].desc.getDims(); + auto src_dims = srcMemory->GetDesc().getShape().getStaticDims(); - std::vector s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); + std::vector s_str = jqp.s_str; size_t tmp = s_str[s_str.size() - 1]; for (int i = s_str.size() - 1; i > 1; i--) { s_str[i] = s_str[i - 1]; @@ -1463,24 +1459,23 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { auto output_scale = reinterpret_cast(internalBlobMemory[4]->GetData()); auto output_shift = reinterpret_cast(internalBlobMemory[5]->GetData()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); + auto& srcDesc = srcMemory->GetDesc(); + auto srcDims = srcDesc.getShape().getStaticDims(); - bool is_blk_format = jqp.src_layout != Layout::NHWC && jqp.src_layout != Layout::NDHWC; - int blk_size = (jqp.src_layout == Layout::CHW || - jqp.src_layout == Layout::NCHW || - jqp.src_layout == Layout::NCDHW) ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8; + bool is_blk_format = !srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5); + int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5)) + ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8; auto src_type_size = jqp.src_prc.size(); auto dst_type_size = jqp.dst_prc.size(); - std::vector s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); + auto s_str = jqp.s_str; - if (jqp.src_layout == BLOCKED) { + if (is_blk_format) { s_str[1] /= blk_size; } - if (jqp.src_layout == Layout::NHWC || jqp.src_layout == Layout::NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { size_t tmp = s_str[s_str.size() - 1]; for (int i = s_str.size() - 1; i > 1; i--) { s_str[i] = s_str[i - 1]; @@ -1495,7 +1490,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1; const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1; - if (jqp.src_layout == Layout::CHW) { + if (srcDesc.hasLayoutType(LayoutType::ncsp) && srcDesc.getShape().getRank() == 3) { parallel_nd(N, CB, D, [&](int n, int cb, int d) { auto arg = jit_quantize_call_args(); @@ -1542,7 +1537,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { arg.src_step = is_blk_format ? (size_t) blk_size * src_type_size : (size_t) C * src_type_size; arg.dst_step = is_blk_format ? (size_t) blk_size * dst_type_size : (size_t) C * dst_type_size; - arg.block_size = (is_blk_format && jqp.src_layout != Layout::NC) ? (size_t) blk_size : nstl::min(blk_size, C - c); + arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c); arg.work_amount = (size_t) W; (*quantize_kernel)(&arg); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h index 4430acac9ba87f..eb6a49b12105bc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h @@ -17,12 +17,14 @@ namespace MKLDNNPlugin { struct jit_quantize_params { int c; + bool is_planar; InferenceEngine::Precision src_prc; InferenceEngine::Precision wei_prc; InferenceEngine::Precision dst_prc; - InferenceEngine::Layout src_layout; + std::vector s_str; + std::vector d_str; Algorithm op_type; }; @@ -109,7 +111,7 @@ class MKLDNNFakeQuantizeNode : public MKLDNNNode { private: void init() override; - std::vector getDataFormats() const; + std::vector getDataFormats() const; void executeReference(); void executeBinarization(); void executeQuantization(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index e5b9ade856754b..ee8dc1b730b911 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -12,6 +12,7 @@ #include #include #include "utils/general_utils.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -50,18 +51,18 @@ MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; return {memory::format_tag::any}; } @@ -100,23 +101,23 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() { outputDataType = memory::data_type::bf16; } - MKLDNNDims inDims = getParentEdgeAt(0)->getDims(); - MKLDNNDims outDims = getChildEdgeAt(0)->getDims(); + const auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); - if (inDims.ndims() == 3) { + if (inDims.size() == 3) { weightsDims = InferenceEngine::SizeVector({static_cast(outDims[2]), static_cast(inDims[2])}); } else { weightsDims.push_back(outDims[1]); - for (int i = 1; i < inDims.ndims(); i++) + for (int i = 1; i < inDims.size(); i++) weightsDims.push_back(inDims[i]); } biasesDims.push_back(weightsDims[0]); - for (auto format : getAvailableFormatsForDims(inDims)) { - MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format); - MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + auto in_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(inDims), inputDataType, format); + auto out_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any); - createDescriptor({in_candidate}, {out_candidate}); + createDescriptorInternal(in_candidate, out_candidate); } } @@ -236,35 +237,40 @@ std::shared_ptr MKLDNNFullyConnectedNode::initPrimitiveA return attr; } -void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; - - mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - if (inDesc.getPrecision() == Precision::BF16) { +// WA: creation MKLDNNMemoryDesc with format == any is prohibited +// so we create mkldnn::memory::desc directly +// we need specific method and can't remove createDescriptor from base class because its used into initDescriptor +void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, + const mkldnn::memory::desc &outputDesc) { + auto in_candidate = inputDesc; + auto out_candidate = outputDesc; + + mkldnn::memory::data_type wdt = in_candidate.data_type(); + mkldnn::memory::data_type bdt = out_candidate.data_type(); + if (in_candidate.data_type() == mkldnn::memory::data_type::bf16) { bdt = mkldnn::memory::data_type::f32; - } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { + } else if (in_candidate.data_type() == mkldnn::memory::data_type::u8 || in_candidate.data_type() == mkldnn::memory::data_type::s8) { wdt = memory::data_type::s8; if (withBiases) bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID)); } - if (inDesc.getDims().size() == 3) { - auto inDims = inDesc.getDims(); - auto outDims = outDesc.getDims(); - InferenceEngine::SizeVector normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; - InferenceEngine::SizeVector normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; - inDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), normalizedInDims, TensorDesc::getLayoutByDims(normalizedInDims)); - outDesc = InferenceEngine::TensorDesc(outDesc.getPrecision(), normalizedOutDims, TensorDesc::getLayoutByDims(normalizedOutDims)); + if (in_candidate.dims().size() == 3) { + auto inDims = in_candidate.dims(); + auto outDims = out_candidate.dims(); + auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; + auto normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; + in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(), + MKLDNNMemory::GetPlainFormatByRank(normalizedInDims.size())); + out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(), + MKLDNNMemory::GetPlainFormatByRank(normalizedOutDims.size())); } - MKLDNNMemoryDesc in_candidate(inDesc); - MKLDNNMemoryDesc out_candidate(outDesc); - MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); if (withBiases) { - MKLDNNMemoryDesc bias_candidate(MKLDNNDims(inDims[BIAS_ID]), bdt, memory::format_tag::any); + mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(inputShapes[BIAS_ID].getStaticDims()), bdt, + mkldnn::memory::format_tag::any); MKLDNNDescriptor desc(std::shared_ptr( new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, bias_candidate, out_candidate))); @@ -277,40 +283,28 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else if (getParentEdgeAt(idx)->getDims().ndims() == 3) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector()))); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + createDescriptorInternal(MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0])); +} + +std::unique_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); + + if (getParentEdgeAt(idx)->getShape().getRank() == 3) { + desc = MKLDNNMemoryDesc(getParentEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); } + return MKLDNNPlugin::make_unique(std::move(desc)); } -MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else if (getChildEdgeAt(idx)->getDims().ndims() == 3) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getChildEdgeAt(idx)->getDims().ToSizeVector()))); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); + if (getChildEdgeAt(idx)->getShape().getRank() == 3) { + desc = MKLDNNMemoryDesc(getChildEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(idx)->getShape().getRank())); } + return MKLDNNPlugin::make_unique(std::move(desc)); } InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const { @@ -324,7 +318,7 @@ InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNFullyConnectedNode, FullyConnected); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 63b1e88ae6f9e9..01820fdfcc39ea 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -16,7 +16,7 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { public: MKLDNNFullyConnectedNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - std::vector getAvailableFormatsForDims(const MKLDNNDims &dims) const override; + std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; void createPrimitive() override; void execute(mkldnn::stream strm) override; @@ -27,15 +27,15 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { } const std::vector& getPrimitivesPriority() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; @@ -47,6 +47,9 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { std::shared_ptr initPrimitiveAttr(); private: + void createDescriptorInternal(const mkldnn::memory::desc &inputDesc, + const mkldnn::memory::desc &outputDesc); + InferenceEngine::SizeVector weightsDims; InferenceEngine::SizeVector biasesDims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp index e3e14e356912db..e4da50abe8af9a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp @@ -86,9 +86,9 @@ void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() { dataTypeSize_ = inDataPrecision.size(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -98,7 +98,7 @@ void MKLDNNGatherElementsNode::directExecution() { const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const int outSize = getChildEdgeAt(0)->getBlob()->size(); + const int outSize = getChildEdgeAt(0)->getShape().getElementsCount(); auto threadBody = [&](const int ithr, const int nthr) { int start(0lu), end(0lu); splitter(outSize, nthr, ithr, start, end); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp index ee7623f9b4810b..75ee34dbda5be4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp @@ -89,9 +89,9 @@ void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() { _dataTypeSize = inDataPrecision.size(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -101,10 +101,11 @@ void MKLDNNGatherNDNode::gatherElementwise() { const auto *indices = reinterpret_cast(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto strides = getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides(); + auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides(); const size_t* srcMultipliers = strides.data() + _batchDims; - const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (sizeof(dataType) * _batchNum); + const size_t cycles = getChildEdgeAt(0)->getShape().getElementsCount() * + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * _blockSize; const size_t workAmount = _batchNum * cycles; @@ -149,11 +150,11 @@ void MKLDNNGatherNDNode::gatherBlocks() { std::vector srcMultipliers(_sliceRank); for (size_t i = 0; i < _sliceRank ; i++) - srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides()[i + _batchDims]; + srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides()[i + _batchDims]; const size_t batchStep = _batchStep * _dataTypeSize; const size_t dataStep = _blockSize * _dataTypeSize; - const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (dataStep * _batchNum); + const size_t cycles = getChildEdgeAt(0)->getMemory().GetSize() / (dataStep * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * dataStep; const size_t workAmount = _batchNum * cycles; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp index ade92f6a4a0060..f41a57730a57cc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp @@ -75,10 +75,10 @@ void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() { return; Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_desc_type::ref_any); } @@ -92,10 +92,10 @@ void MKLDNNGatherNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor."; - const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector(); - const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector(); - const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector(); - dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size(); + const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getShape().getStaticDims(); + const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getShape().getStaticDims(); + const SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().GetDesc().getPrecision().size(); indexRange = srcDims[axis]; batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp index ce396446df2418..89fb6c08167f68 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -66,11 +65,11 @@ void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has incorrect input/output data precision. Must be the same."; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -85,16 +84,16 @@ template void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept { const auto *step_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr()); const auto * const parent_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr()); - const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().size() - - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDesc().getBlockingDesc().getOffsetPadding(); + const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getElementsCount() + - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType().getOffsetPadding(); const auto *max_seq_len = reinterpret_cast(getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemoryPtr()->GetPtr()); auto end_token = (reinterpret_cast(getParentEdgeAt(GATHER_TREE_END_TOKEN)->getMemoryPtr()->GetPtr()))[0]; auto * final_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getDims().ToSizeVector(); - SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().ToSizeVector(); - SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getDims().ToSizeVector(); - SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getShape().getStaticDims(); + SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getStaticDims(); + SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getShape().getStaticDims(); + SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); int32_t max_time = step_idx_dims[0]; const size_t batch_size = step_idx_dims[1]; const size_t beam_width = step_idx_dims[2]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp index 86f89ccea7c480..ef87345daae9a1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -22,6 +23,42 @@ void MKLDNNGenericNode::getSupportedDescriptors() { } } +NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig) { + NodeConfig config; + config.dynBatchSupport = layerConfig.dynBatchSupport; + config.inConfs.resize(layerConfig.inConfs.size()); + for (size_t i = 0; i < layerConfig.inConfs.size(); i++) { + config.inConfs[i].inPlace = layerConfig.inConfs[i].inPlace; + config.inConfs[i].constant = layerConfig.inConfs[i].constant; + config.inConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.inConfs[i].desc).clone(); + } + config.outConfs.resize(layerConfig.outConfs.size()); + for (size_t i = 0; i < layerConfig.outConfs.size(); i++) { + config.outConfs[i].inPlace = layerConfig.outConfs[i].inPlace; + config.outConfs[i].constant = layerConfig.outConfs[i].constant; + config.outConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.outConfs[i].desc).clone(); + } + return config; +} + +InferenceEngine::LayerConfig MKLDNNGenericNode::convertNodeToLayerConfig(const NodeConfig &nodeConfig) { + InferenceEngine::LayerConfig config; + config.dynBatchSupport = nodeConfig.dynBatchSupport; + config.inConfs.resize(nodeConfig.inConfs.size()); + for (size_t i = 0; i < nodeConfig.inConfs.size(); i++) { + config.inConfs[i].inPlace = nodeConfig.inConfs[i].inPlace; + config.inConfs[i].constant = nodeConfig.inConfs[i].constant; + config.inConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.inConfs[i].desc); + } + config.outConfs.resize(nodeConfig.outConfs.size()); + for (size_t i = 0; i < nodeConfig.outConfs.size(); i++) { + config.outConfs[i].inPlace = nodeConfig.outConfs[i].inPlace; + config.outConfs[i].constant = nodeConfig.outConfs[i].constant; + config.outConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.outConfs[i].desc); + } + return config; +} + void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -35,7 +72,7 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { } for (auto& config : configs) { - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); + supportedPrimitiveDescriptors.emplace_back(convertLayerToNodeConfig(config), impl_desc_type::unknown); } } if (impls.empty()) { @@ -109,9 +146,9 @@ void MKLDNNGenericNode::execLayer() { std::vector inputs; std::vector constInputs; std::vector inputDescs; - std::vector outputShapes; + std::vector execOutputShapes; for (size_t i = 0; i < getParentEdges().size(); i++) { - auto inputBlob = getParentEdgeAt(i)->getBlob(); + auto inputBlob = MemoryDescUtils::interpretAsBlob(getParentEdgeAt(i)->getMemory()); inputs.push_back(inputBlob); constInputs.push_back(inputBlob); if (isDynBatch && dynBatchLim >= inputs[inputs.size() - 1]->getTensorDesc().getDims()[0]) { @@ -137,14 +174,14 @@ void MKLDNNGenericNode::execLayer() { } } std::vector outputs; - for (size_t i = 0; i < outDims.size(); i++) { + for (size_t i = 0; i < outputShapes.size(); i++) { if (isDynBatch) { auto out_edge = getChildEdgesAtPort(i)[0]; - auto td = out_edge->getBlob()->getTensorDesc(); - td.setDims(outputShapes[i]); + auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().GetDesc()); + td.setDims(execOutputShapes[i]); outputs.push_back(make_blob_with_precision(td, out_edge->getMemory().GetData())); } else { - outputs.push_back(getChildEdgesAtPort(i)[0]->getBlob()); + outputs.push_back(MemoryDescUtils::interpretAsBlob(getChildEdgesAtPort(i)[0]->getMemory())); } } InferenceEngine::ResponseDesc resp; @@ -154,8 +191,8 @@ void MKLDNNGenericNode::execLayer() { } } -void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &config) { - InferenceEngine::LayerConfig rightConfig = config; +void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) { + NodeConfig rightConfig = config; InferenceEngine::StatusCode rc; InferenceEngine::ResponseDesc resp; @@ -190,14 +227,15 @@ void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &confi impls.clear(); impls.emplace_back(selectedImpl); - rc = impls[0]->init(rightConfig, &resp); + auto ieConfig = convertNodeToLayerConfig(rightConfig); + rc = impls[0]->init(ieConfig, &resp); if (rc != InferenceEngine::OK) { IE_THROW() << resp.msg; } - + rightConfig = convertLayerToNodeConfig(ieConfig); auto descriptor = getSelectedPrimitiveDescriptor(); if (descriptor != nullptr) { - descriptor->getConfig() = rightConfig; + descriptor->setConfig(rightConfig); } bool isConst = !rightConfig.inConfs.empty() || !rightConfig.outConfs.empty(); for (const auto &inConf : rightConfig.inConfs) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h index f93b79c785266f..63d0d5e20f059a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h @@ -29,12 +29,15 @@ class MKLDNNGenericNode : public MKLDNNNode { return false; } - void initDescriptor(const InferenceEngine::LayerConfig& config) override; + void initDescriptor(const NodeConfig& config) override; void execLayer(); void cleanup() override; protected: + NodeConfig convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig); + InferenceEngine::LayerConfig convertNodeToLayerConfig(const NodeConfig &nodeConfig); + InferenceEngine::ILayerImplFactory::Ptr extFactory; std::vector impls; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp index 0dbe8dee59ea51..605aa2d6af5283 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -46,8 +44,8 @@ void MKLDNNGRNNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32, false, 0}}, + {{LayoutType::ncsp, Precision::FP32, false, 0}}, impl_desc_type::ref_any); } @@ -55,7 +53,7 @@ void MKLDNNGRNNode::execute(mkldnn::stream strm) { const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector dims = getParentEdgeAt(0)->getDims().ToSizeVector(); + SizeVector dims = getParentEdgeAt(0)->getShape().getStaticDims(); int N = static_cast((dims.size() > 0) ? dims[0] : 1); int C = static_cast((dims.size() > 1) ? dims[1] : 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index 1926914f07431a..33b6fdab4f4984 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -149,7 +149,7 @@ struct jit_has_subnormals_base::reg { template struct jit_has_subnormals : public jit_has_subnormals_base { - void generate() final { + void generate() override final { // NOLINT size_t const vlen = reg::length; const int sh_bits = std::ilogb(vlen); @@ -246,7 +246,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const } void MKLDNNInputNode::cloneBlobIfRequired() { - MKLDNNDims dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); + std::vector dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); const size_t size = dims.size(); MKLDNNMemoryDesc memDesc(dims, MKLDNNExtensionUtils::IEPrecisionToDataType(prec)); @@ -349,15 +349,15 @@ void MKLDNNInputNode::cloneBlobIfRequired() { } } -MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name, +MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(type, name, eng, cache) { constant = ConstantType::NoConst; if (getType() == Input) { - outDims.emplace_back(dims); + outputShapes.emplace_back(shape); addOriginalOutputPrecision(prc); } else if (getType() == Output) { - inDims.emplace_back(dims); + inputShapes.emplace_back(shape); addOriginalInputPrecision(prc); } } @@ -388,42 +388,29 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - LayerConfig config; - config.dynBatchSupport = true; + std::vector inPortConfs; + std::vector outPortConfs; + if (getType() == Input || getType() == MemoryInput) { precision = getOriginalOutputPrecisionAtPort(0); if (precision == Precision::U16 || isMeanImage) { precision = Precision::FP32; } - DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto mem_tdesc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); - dataConfig.desc = mem_tdesc; - config.outConfs.push_back(dataConfig); - // ReadValue operation expects constant input + + outPortConfs.push_back({LayoutType::ncsp, precision}); if (!getParentEdges().empty()) { - DataConfig inConfig; - inConfig.inPlace = -1; - inConfig.constant = true; - inConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); - config.inConfs.push_back(inConfig); + inPortConfs.push_back({LayoutType::ncsp, precision, true}); } } else if (getType() == Output) { precision = getOriginalInputPrecisionAtPort(0); if (precision == Precision::U16) precision = Precision::FP32; - DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto mem_tdesc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); - dataConfig.desc = mem_tdesc; - config.inConfs.push_back(dataConfig); + + inPortConfs.push_back({LayoutType::ncsp, precision}); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); + + addSupportedPrimDesc(inPortConfs, + outPortConfs, + impl_desc_type::unknown); } void MKLDNNInputNode::createPrimitive() { @@ -440,7 +427,7 @@ void MKLDNNInputNode::createPrimitive() { << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; } - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index 872f8e14f8e295..8c57ac8873007e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNInputNode : public MKLDNNNode { public: MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name, + MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index caedec83ee8300..df6e4930b54c73 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -1829,7 +1829,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); + srcDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); int dataRank = srcDim.size(); // get pad @@ -1868,7 +1868,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { } else { srcDimPad = srcDim; } - dstDim = getChildEdgeAt(0)->getDims().ToSizeVector(); + dstDim = getChildEdgeAt(0)->getShape().getStaticDims(); } void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { @@ -1902,7 +1902,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { inputPrec = inputPrecision; outputPrec = outputPrecision; - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; if (isAxesSpecified) { config.inConfs.resize(4); @@ -1916,22 +1916,26 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { - config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat); - config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::format_tag::x); - config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::format_tag::x); + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), + inputDataType, dataFormat); + config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(), + targetShapeType, memory::format_tag::x); + config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType, + memory::format_tag::x); if (isAxesSpecified) - config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat); - supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat}); + config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat); + supportedPrimitiveDescriptors.push_back({config, implDetail}); }; - auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1; + auto channels = getParentEdgeAt(DATA_ID)->getShape().getRank() > 1 ? getParentEdgeAt(DATA_ID)->getShape().getStaticDims()[1] : 1; if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref); } else { // blk and by_channel JIT kernel on sse41 or above machine - if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { + if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 4) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nhwc, jit_avx512); if (channels != 1) @@ -1945,7 +1949,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { if (channels != 1) pushDesc(memory::format_tag::nChw8c, jit_sse42); } - } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) { + } else if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 5 && mode != InterpolateMode::cubic) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::ndhwc, jit_avx512); if (channels != 1) @@ -1963,7 +1967,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2); } } } @@ -1989,11 +1993,10 @@ void MKLDNNInterpolateNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_interpolate_config_params(); jcp.mode = mode; - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = getParentEdgeAt(0)->getMemory().GetDataType(); + jcp.dst_dt = getChildEdgeAt(0)->getMemory().GetDataType(); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.indices_size = sizeof(int); @@ -2008,9 +2011,10 @@ void MKLDNNInterpolateNode::createPrimitive() { jcp.ID = srcDimPad5d[2]; jcp.spatial_dim_size = spatialDimSize; - if (getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) { + if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { jcp.layout = InterpolateLayoutType::planar; - } else if (getChildEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + } else if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c)) { jcp.layout = InterpolateLayoutType::block; } else { jcp.layout = InterpolateLayoutType::by_channel; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp index 5750f8517b0096..702d5b42f0cff1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp @@ -64,8 +64,8 @@ void MKLDNNLogSoftmaxNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp index a3460091ecf697..b107fca78343b2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp @@ -6,6 +6,7 @@ #include #include #include +#include using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -88,19 +89,20 @@ void MKLDNNLrnNode::getSupportedDescriptors() { precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto parentDims = getParentEdgeAt(0)->getDims(); + const auto &parentShape = getParentEdgeAt(0)->getShape(); + const auto parentStaticDims = parentShape.getStaticDims(); - for (auto format : getAvailableFormatsForDims(parentDims)) { - MKLDNNMemoryDesc in_candidate(parentDims, inputDataType, format); - createDescriptor({in_candidate}, {}); + for (auto format : getAvailableFormatsForDims(parentShape)) { + auto in_candidate = MKLDNNPlugin::make_unique(parentStaticDims, inputDataType, format); + createDescriptor({in_candidate.get()}, {}); } } -MKLDNNMemoryDesc MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::unique_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx > 0) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisions()[idx], - getParentEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector()))); + return MKLDNNPlugin::make_unique(getParentEdgeAt(idx)->getShape().getStaticDims(), + MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[idx]), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); } else { return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx); } @@ -123,12 +125,12 @@ bool MKLDNNLrnNode::created() const { return getType() == Lrn; } -void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel; - MKLDNNMemoryDesc in_candidate(inputDesc[0]); MKLDNNDescriptor desc(std::shared_ptr( - new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k))); + new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), + size, alpha, beta, k))); descs.push_back(desc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h index 53cfaa79682d99..295d16b369c191 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h @@ -17,12 +17,12 @@ class MKLDNNLrnNode : public MKLDNNNode { MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp index 908686bf6df1eb..fed1158f97eaf2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp @@ -49,18 +49,18 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNMathNode::execute(mkldnn::stream strm) { - size_t dataSize = getChildEdgeAt(0)->getBlob()->size(); + size_t dataSize = getChildEdgeAt(0)->getShape().getElementsCount(); const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index b7f2c0a4277634..a0a7f7eafa419f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -55,8 +55,8 @@ MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr& op, cons errorPrefix = "Gemm node with name '" + getName() + "'"; const auto matMul = std::dynamic_pointer_cast(op); - alpha = 1; - beta = 1; + alpha = 1.f; + beta = 0.f; transposeA = matMul->get_transpose_a(); transposeB = matMul->get_transpose_b(); } else { @@ -70,14 +70,14 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName(); - auto inDims0 = getParentEdgeAt(0)->getDims(); - auto inDims1 = getParentEdgeAt(1)->getDims(); - auto outDims = getChildEdgeAt(0)->getDims(); + auto inDims0 = getParentEdgeAt(0)->getShape().getStaticDims(); + auto inDims1 = getParentEdgeAt(1)->getShape().getStaticDims(); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); - if (inDims0.ndims() != inDims1.ndims() || inDims0.ndims() != outDims.ndims()) + if (inDims0.size() != inDims1.size() || inDims0.size() != outDims.size()) IE_THROW() << errorPrefix << " has invalid dims count"; - int nDims = inDims0.ndims(); + int nDims = inDims0.size(); xAxis = nDims - 1; yAxis = nDims - 2; auto xAxis0 = transposeA ? yAxis : xAxis; @@ -135,22 +135,22 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { auto inputDataType1 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec1); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; - auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig { - InferenceEngine::DataConfig dataConfig; + auto createDataConfig = [](const std::vector& dims, memory::data_type dataType) -> PortConfig { + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims)); + dataConfig.desc = MKLDNNPlugin::make_unique(dims, dataType, MKLDNNMemory::GetPlainFormatByRank(dims.size())); return dataConfig; }; - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), inputDataType0)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), inputDataType1)); - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), outputDataType)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType0)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape().getStaticDims(), inputDataType1)); + config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType)); - supportedPrimitiveDescriptors.push_back(PrimitiveDescInfo(config, impl_desc_type::gemm_any, MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()))); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_any); } void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() { @@ -158,8 +158,9 @@ void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; + + if (isConfigDefined(config)) + return; MKLDNNNode::initOptimalPrimitiveDescriptor(); @@ -179,6 +180,34 @@ void MKLDNNMatMulNode::createPrimitive() { IE_THROW() << errorPrefix << " did not allocate input memory"; if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; + + auto inDims0 = src0MemPtr->GetDims(); + auto outDims = dstMemPtr->GetDims(); + + params.src0_mem_ptr = src0MemPtr; + params.src1_mem_ptr = src1MemPtr; + params.dst_mem_ptr = dstMemPtr; + + params.ndims = outDims.size(); + + params.MB1 = 1; + params.MB2 = outDims.size() > 3 ? outDims[params.ndims - 3] : 1; + + params.M = outDims[yAxis]; + params.N = outDims[xAxis]; + params.K = transposeA ? inDims0[yAxis] : inDims0[xAxis]; + + params.transa = transposeA ? 'T' : 'N'; + params.transb = transposeB ? 'T' : 'N'; + + params.lda = transposeA ? params.M : params.K; + params.ldb = transposeB ? params.K : params.N; + params.ldc = params.N; + + params.shift1 = params.M * params.N * params.MB2; + params.shift2 = params.M * params.N; + + runtimePrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); } inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda, @@ -212,67 +241,57 @@ inline void process_gemm(char transa, char transb, int M, int N, int K, float al } template -void MKLDNNMatMulNode::process_data() { - auto inDims0 = getParentEdgeAt(0)->getDims(); - auto inDims1 = getParentEdgeAt(1)->getDims(); - auto outDims = getChildEdgeAt(0)->getDims(); - - auto& srcMemory0 = getParentEdgeAt(0)->getMemory(); - auto& srcMemory1 = getParentEdgeAt(1)->getMemory(); - auto& dstMemory0 = getChildEdgeAt(0)->getMemory(); - - const T0 *src0_ptr = reinterpret_cast(srcMemory0.GetPtr()); - const T1 *src1_ptr = reinterpret_cast(srcMemory1.GetData()); - float *dst_ptr = reinterpret_cast(dstMemory0.GetData()); - - int MB1 = outDims.ndims() == 4 ? batchToProcess() : 1; - int MB2 = outDims.ndims() == 3 ? batchToProcess() : outDims.ndims() > 3 ? outDims[outDims.ndims() - 3] : 1; - int M = outDims[yAxis]; - int N = outDims[xAxis]; - int K = transposeA ? inDims0[yAxis] : inDims0[xAxis]; - - const char transa = transposeA ? 'T' : 'N'; - const char transb = transposeB ? 'T' : 'N'; - - int lda = transposeA ? M : K; - int ldb = transposeB ? K : N; - int ldc = N; - - beta = 0.f; +inline void MKLDNNMatMulNode::process_data() { + const T0* src0_ptr = reinterpret_cast(params.src0_mem_ptr->GetPtr()); + const T1* src1_ptr = reinterpret_cast(params.src1_mem_ptr->GetPtr()); + float* dst_ptr = reinterpret_cast(params.dst_mem_ptr->GetPtr()); + + const int MB = batchToProcess(); + if (params.ndims == 4) { + params.MB1 = MB; + } else if (params.ndims == 3) { + params.shift1 = params.shift1 * MB / params.MB2; + params.MB2 = MB; + } - for (int b1 = 0; b1 < MB1; b1++) { + for (int b1 = 0; b1 < params.MB1; ++b1) { const T0 *a_ptr = src0_ptr; const T1 *b_ptr = src1_ptr; float *d_ptr = dst_ptr; - for (int b2 = 0; b2 < MB2; b2++) { - process_gemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc); + for (int b2 = 0; b2 < params.MB2; ++b2) { + process_gemm(params.transa, params.transb, params.M, params.N, params.K, + alpha, a_ptr, params.lda, b_ptr, params.ldb, beta, d_ptr, params.ldc); a_ptr += aOffsets[0]; b_ptr += bOffsets[0]; - d_ptr += M * N; + d_ptr += params.shift2; } src0_ptr += aOffsets[1]; src1_ptr += bOffsets[1]; - dst_ptr += MB2 * M * N; + dst_ptr += params.shift1; } } void MKLDNNMatMulNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision()) { - case Precision::FP32: + switch (runtimePrecision) { + case Precision::FP32: { process_data(); break; - case Precision::BF16: + } + case Precision::BF16: { process_data(); break; - case Precision::I8: + } + case Precision::I8: { process_data(); break; - case Precision::U8: + } + case Precision::U8: { process_data(); break; + } default: IE_THROW() << errorPrefix << " has incorrect precision on first input"; } @@ -283,13 +302,13 @@ bool MKLDNNMatMulNode::created() const { } int MKLDNNMatMulNode::getMaxBatch() { - if (!outDims.empty()) - return outDims[0][0]; + if (!outputShapes.empty()) + return outputShapes[0].getStaticDims()[0]; return 0; } InferenceEngine::Precision MKLDNNMatMulNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } REG_MKLDNN_PRIM_FOR(MKLDNNMatMulNode, MatMul); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h index 6196665aabcad7..3f056cc99533d9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h @@ -28,8 +28,8 @@ class MKLDNNMatMulNode : public MKLDNNNode { static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - float alpha = 1.0f; - float beta = 1.0f; + float alpha = 1.f; + float beta = 0.f; bool transposeA = false; bool transposeB = false; @@ -40,9 +40,36 @@ class MKLDNNMatMulNode : public MKLDNNNode { std::vector bOffsets; std::vector cOffsets; - template void process_data(); + InferenceEngine::Precision runtimePrecision; + + template inline void process_data(); std::string errorPrefix; + + struct { + MKLDNNMemoryPtr src0_mem_ptr = nullptr; + MKLDNNMemoryPtr src1_mem_ptr = nullptr; + MKLDNNMemoryPtr dst_mem_ptr = nullptr; + + char transa = 'N'; + char transb = 'N'; + + int MB1 = 1; + int MB2 = 1; + + int M = 0; + int N = 0; + int K = 0; + + int lda = 0; + int ldb = 0; + int ldc = 0; + + int shift1 = 0; + int shift2 = 0; + + size_t ndims = 0; + } params; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp new file mode 100644 index 00000000000000..ade776e8ce3b04 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp @@ -0,0 +1,383 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_matrix_nms_node.h" + +#include +#include +#include +#include +#include + +#include "ie_parallel.hpp" +#include "ngraph/opsets/opset8.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction; + +bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MatrixNms operation is supported"; + return false; + } + const auto& attrs = nms->get_attrs(); + const auto& sortType = attrs.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + const auto& decayType = attrs.decay_function; + if (!one_of(decayType, ngNmseDcayFunction::LINEAR, ngNmseDcayFunction::GAUSSIAN)) { + errorMessage = "Does not support DcayFunction " + ngraph::as_string(decayType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + errorPrefix = "MatrixNMS layer with name '" + getName() + "' "; + const auto matrix_nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) { + IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions"; + } + + m_numBatches = boxes_dims[0]; + m_numBoxes = boxes_dims[1]; + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + + m_numClasses = scores_dims[1]; + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (m_numBatches != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (m_numBoxes != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + auto& attrs = matrix_nms->get_attrs(); + if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID) + m_sortResultType = MatrixNmsSortResultType::CLASSID; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::SCORE) + m_sortResultType = MatrixNmsSortResultType::SCORE; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::NONE) + m_sortResultType = MatrixNmsSortResultType::NONE; + + if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN) + m_decayFunction = GAUSSIAN; + else if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::LINEAR) + m_decayFunction = LINEAR; + + m_sortResultAcrossBatch = attrs.sort_result_across_batch; + m_scoreThreshold = attrs.score_threshold; + m_nmsTopk = attrs.nms_top_k; + m_keepTopk = attrs.keep_top_k; + m_backgroundClass = attrs.background_class; + + m_gaussianSigma = attrs.gaussian_sigma; + m_postThreshold = attrs.post_threshold; + m_normalized = attrs.normalized; + int64_t max_output_boxes_per_class = 0; + size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + if (m_nmsTopk >= 0) + max_output_boxes_per_class = std::min(m_numBoxes, static_cast(m_nmsTopk)); + else + max_output_boxes_per_class = m_numBoxes; + + m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes; + if (m_keepTopk >= 0) + m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast(m_keepTopk)); +} + +void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast(m_numBoxes)); + m_numPerBatch.resize(m_numBatches); + m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes); + m_numPerBatchClass.resize(m_numBatches, std::vector(m_numClasses, 0)); + m_classOffset.resize(m_numClasses, 0); + + for (size_t i = 0, count = 0; i < m_numClasses; i++) { + if (i == m_backgroundClass) + continue; + m_classOffset[i] = (count++) * m_realNumBoxes; + } + + if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return (1. - iou) / (1. - max_iou + 1e-10f); + }; + } else { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return std::exp((max_iou * max_iou - iou * iou) * sigma); + }; + } + + const std::vector supportedFloatPrecision = {Precision::FP32}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); + + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +bool MKLDNNMatrixNmsNode::created() const { + return getType() == MatrixNms; +} + +namespace { + +static inline float boxArea(const float* bbox, const bool normalized) { + if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) { + return static_cast(0.); + } else { + const float width = bbox[2] - bbox[0]; + const float height = bbox[3] - bbox[1]; + if (normalized) { + return width * height; + } else { + return (width + 1) * (height + 1); + } + } +} + +static inline float intersectionOverUnion(const float* bbox1, const float* bbox2, const bool normalized) { + if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] || bbox2[3] < bbox1[1]) { + return static_cast(0.); + } else { + const float xMin = std::max(bbox1[0], bbox2[0]); + const float yMin = std::max(bbox1[1], bbox2[1]); + const float xMax = std::min(bbox1[2], bbox2[2]); + const float yMax = std::min(bbox1[3], bbox2[3]); + float norm = normalized ? static_cast(0.) : static_cast(1.); + float width = xMax - xMin + norm; + float height = yMax - yMin + norm; + const float interArea = width * height; + const float bbox1Area = boxArea(bbox1, normalized); + const float bbox2Area = boxArea(bbox2, normalized); + return interArea / (bbox1Area + bbox2Area - interArea); + } +} +} // namespace + +size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) { + std::vector candidateIndex(m_numBoxes); + std::iota(candidateIndex.begin(), candidateIndex.end(), 0); + auto end = std::remove_if(candidateIndex.begin(), candidateIndex.end(), [&scoresData, this](int32_t idx) { + return scoresData[idx] <= m_scoreThreshold; + }); + int64_t numDet = 0; + int64_t originalSize = std::distance(candidateIndex.begin(), end); + if (originalSize <= 0) { + return 0; + } + if (m_nmsTopk > -1 && originalSize > m_nmsTopk) { + originalSize = m_nmsTopk; + } + + std::partial_sort(candidateIndex.begin(), candidateIndex.begin() + originalSize, end, [&scoresData](int32_t a, int32_t b) { + return scoresData[a] > scoresData[b]; + }); + + std::vector iouMatrix((originalSize * (originalSize - 1)) >> 1); + std::vector iouMax(originalSize); + + iouMax[0] = 0.; + InferenceEngine::parallel_for(originalSize - 1, [&](size_t i) { + float max_iou = 0.; + size_t actual_index = i + 1; + auto idx_a = candidateIndex[actual_index]; + for (int64_t j = 0; j < actual_index; j++) { + auto idx_b = candidateIndex[j]; + auto iou = intersectionOverUnion(boxesData + idx_a * 4, boxesData + idx_b * 4, m_normalized); + max_iou = std::max(max_iou, iou); + iouMatrix[actual_index * (actual_index - 1) / 2 + j] = iou; + } + iouMax[actual_index] = max_iou; + }); + + if (scoresData[candidateIndex[0]] > m_postThreshold) { + auto box_index = candidateIndex[0]; + auto box = boxesData + box_index * 4; + filterBoxes[0].box.x1 = box[0]; + filterBoxes[0].box.y1 = box[1]; + filterBoxes[0].box.x2 = box[2]; + filterBoxes[0].box.y2 = box[3]; + filterBoxes[0].index = batchIdx * m_numBoxes + box_index; + filterBoxes[0].score = scoresData[candidateIndex[0]]; + filterBoxes[0].batchIndex = batchIdx; + filterBoxes[0].classIndex = classIdx; + numDet++; + } + + for (int64_t i = 1; i < originalSize; i++) { + float minDecay = 1.; + for (int64_t j = 0; j < i; j++) { + auto maxIou = iouMax[j]; + auto iou = iouMatrix[i * (i - 1) / 2 + j]; + auto decay = m_decay_fn(iou, maxIou, m_gaussianSigma); + minDecay = std::min(minDecay, decay); + } + auto ds = minDecay * scoresData[candidateIndex[i]]; + if (ds <= m_postThreshold) + continue; + auto boxIndex = candidateIndex[i]; + auto box = boxesData + boxIndex * 4; + filterBoxes[numDet].box.x1 = box[0]; + filterBoxes[numDet].box.y1 = box[1]; + filterBoxes[numDet].box.x2 = box[2]; + filterBoxes[numDet].box.y2 = box[3]; + filterBoxes[numDet].index = batchIdx * m_numBoxes + boxIndex; + filterBoxes[numDet].score = ds; + filterBoxes[numDet].batchIndex = batchIdx; + filterBoxes[numDet].classIndex = classIdx; + numDet++; + } + return numDet; +} + +void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) { + if (classIdx == m_backgroundClass) { + m_numPerBatchClass[batchIdx][classIdx] = 0; + return; + } + const float* boxesPtr = boxes + batchIdx * m_numBoxes * 4; + const float* scoresPtr = scores + batchIdx * (m_numClasses * m_numBoxes) + classIdx * m_numBoxes; + size_t classNumDet = 0; + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + classNumDet = nmsMatrix(boxesPtr, scoresPtr, m_filteredBoxes.data() + batchOffset + m_classOffset[classIdx], batchIdx, classIdx); + m_numPerBatchClass[batchIdx][classIdx] = classNumDet; + }); + + InferenceEngine::parallel_for(m_numBatches, [&](size_t batchIdx) { + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + BoxInfo* batchFilteredBox = m_filteredBoxes.data() + batchOffset; + auto& numPerClass = m_numPerBatchClass[batchIdx]; + auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), 0); + auto start_offset = numPerClass[0]; + + for (size_t i = 1; i < numPerClass.size(); i++) { + auto offset_class = m_classOffset[i]; + for (size_t j = 0; j < numPerClass[i]; j++) { + batchFilteredBox[start_offset + j] = batchFilteredBox[offset_class + j]; + } + start_offset += numPerClass[i]; + } + auto keepNum = numDet; + if (m_keepTopk > -1) { + auto k = static_cast(m_keepTopk); + if (keepNum > k) + keepNum = k; + } + + std::partial_sort(batchFilteredBox, batchFilteredBox + keepNum, batchFilteredBox + numDet, [](const BoxInfo& lhs, const BoxInfo rhs) { + return lhs.score > rhs.score || (lhs.score == rhs.score && lhs.classIndex < rhs.classIndex) || + (lhs.score == rhs.score && lhs.classIndex == rhs.classIndex && lhs.index < rhs.index); + }); + m_numPerBatch[batchIdx] = keepNum; + }); + + auto startOffset = m_numPerBatch[0]; + for (size_t i = 1; i < m_numPerBatch.size(); i++) { + auto offset_batch = i * m_realNumClasses * m_realNumBoxes; + for (size_t j = 0; j < m_numPerBatch[i]; j++) { + m_filteredBoxes[startOffset + j] = m_filteredBoxes[offset_batch + j]; + } + startOffset += m_numPerBatch[i]; + } + + if (m_sortResultAcrossBatch) { /* sort across batch */ + if (m_sortResultType == MatrixNmsSortResultType::SCORE) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.score > r.score) || (l.score == r.score && l.batchIndex < r.batchIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex < r.classIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex == r.classIndex && l.index < r.index); + }); + } else if (m_sortResultType == MatrixNmsSortResultType::CLASSID) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.classIndex < r.classIndex) || (l.classIndex == r.classIndex && l.batchIndex < r.batchIndex) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score > r.score) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score == r.score && l.index < r.index); + }); + } + } + + float* selectedOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + int* selectedIndices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr()); + int* validOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs); + + int64_t outputOffset = 0; + int64_t originalOffset = 0; + for (size_t i = 0; i < m_numBatches; i++) { + auto real_boxes = m_numPerBatch[i]; + for (size_t j = 0; j < real_boxes; j++) { + auto originalIndex = originalOffset + j; + selectedIndices[j + outputOffset] = static_cast(m_filteredBoxes[originalIndex].index); + auto selectedBase = selectedOutputs + (outputOffset + j) * 6; + selectedBase[0] = m_filteredBoxes[originalIndex].classIndex; + selectedBase[1] = m_filteredBoxes[originalIndex].score; + selectedBase[2] = m_filteredBoxes[originalIndex].box.x1; + selectedBase[3] = m_filteredBoxes[originalIndex].box.y1; + selectedBase[4] = m_filteredBoxes[originalIndex].box.x2; + selectedBase[5] = m_filteredBoxes[originalIndex].box.y2; + } + std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1); + std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1); + outputOffset += m_maxBoxesPerBatch; + originalOffset += real_boxes; + } +} + +void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h new file mode 100644 index 00000000000000..5d85a3669529d3 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +namespace MKLDNNPlugin { + +enum MatrixNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR }; + +class MKLDNNMatrixNmsNode : public MKLDNNNode { +public: + MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input + static const size_t NMS_BOXES = 0; + static const size_t NMS_SCORES = 1; + + // output + static const size_t NMS_SELECTED_OUTPUTS = 0; + static const size_t NMS_SELECTED_INDICES = 1; + static const size_t NMS_VALID_OUTPUTS = 2; + + size_t m_numBatches; + size_t m_numBoxes; + size_t m_numClasses; + size_t m_maxBoxesPerBatch; + + MatrixNmsSortResultType m_sortResultType; + bool m_sortResultAcrossBatch; + float m_scoreThreshold; + int m_nmsTopk; + int m_keepTopk; + int m_backgroundClass; + MatrixNmsDecayFunction m_decayFunction; + float m_gaussianSigma; + float m_postThreshold; + bool m_normalized; + + struct Rectangle { + Rectangle(float x_left, float y_left, float x_right, float y_right) : x1 {x_left}, y1 {y_left}, x2 {x_right}, y2 {y_right} {} + + Rectangle() = default; + + float x1 = 0.0f; + float y1 = 0.0f; + float x2 = 0.0f; + float y2 = 0.0f; + }; + + struct BoxInfo { + BoxInfo(const Rectangle& r, int64_t idx, float sc, int64_t batch_idx, int64_t class_idx) + : box {r}, index {idx}, batchIndex {batch_idx}, classIndex {class_idx}, score {sc} {} + + BoxInfo() = default; + + Rectangle box; + int64_t index = -1; + int64_t batchIndex = -1; + int64_t classIndex = -1; + float score = 0.0f; + }; + std::string errorPrefix; + const std::string inType = "input", outType = "output"; + std::vector m_numPerBatch; + std::vector> m_numPerBatchClass; + std::vector m_filteredBoxes; + std::vector m_classOffset; + size_t m_realNumClasses; + size_t m_realNumBoxes; + float (*m_decay_fn)(float, float, float); + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + size_t nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp index 4aa0281a114086..3218bc54eb0300 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp @@ -60,13 +60,14 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims())); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, memory::format_tag::any); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) { @@ -105,8 +106,7 @@ MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr void MKLDNNMemoryInputNode::createPrimitive() { MKLDNNInputNode::createPrimitive(); - auto mem_desc = getChildEdgeAt(0)->getMemoryPtr()->GetDescriptor(); - dataStore->Create(mem_desc); + dataStore->Create(getChildEdgeAt(0)->getMemory().GetDesc()); // default memory state is zero filled dataStore->FillZero(); @@ -119,7 +119,7 @@ void MKLDNNMemoryInputNode::createPrimitive() { * @param src source memory object */ inline -static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) { +static void simple_copy(const MKLDNNMemory& dst, const MKLDNNMemory& src) { auto srcPtr = static_cast(src.GetPtr()); auto dstPtr = static_cast(dst.GetPtr()); auto srcSizeInByte = src.GetSize(); @@ -146,11 +146,10 @@ void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) { } void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) { - auto dst_mem = getChildEdgeAt(0)->getMemory(); // TODO: Should be simple call of: // dst_mem.SetData(dataStore, false); // But because of performance reason we use simple manual copy - simple_copy(dst_mem, *dataStore); + simple_copy(getChildEdgeAt(0)->getMemory(), *dataStore); } MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp new file mode 100644 index 00000000000000..64dccbdaeab47e --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp @@ -0,0 +1,413 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_multiclass_nms.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ie_parallel.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported"; + return false; + } + const auto& atrri = nms->get_attrs(); + const auto& sortType = atrri.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + errorPrefix = "MultiClassNms layer with name '" + getName() + "' "; + const auto nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + auto& atrri = nms->get_attrs(); + sort_result_across_batch = atrri.sort_result_across_batch; + max_output_boxes_per_class = atrri.nms_top_k; + iou_threshold = atrri.iou_threshold; + score_threshold = atrri.score_threshold; + background_class = atrri.background_class; + keep_top_k = atrri.keep_top_k; + if (atrri.sort_result_type == ngNmsSortResultType::CLASSID) + sort_result_type = MulticlassNmsSortResultType::CLASSID; + else if (atrri.sort_result_type == ngNmsSortResultType::SCORE) + sort_result_type = MulticlassNmsSortResultType::SCORE; + else if (atrri.sort_result_type == ngNmsSortResultType::NONE) + sort_result_type = MulticlassNmsSortResultType::NONE; + nms_eta = atrri.nms_eta; + normalized = atrri.normalized; + + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (boxes_dims[0] != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (boxes_dims[1] != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + + const SizeVector& valid_outputs_dims = outputShapes[NMS_SELECTEDNUM].getStaticDims(); + if (valid_outputs_dims.size() != 1) + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size(); + if (valid_outputs_dims[0] != boxes_dims[0]) // valid_outputs_dims[0] != num_batches + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0]; +} + +void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + num_batches = boxes_dims[0]; + num_boxes = boxes_dims[1]; + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + num_classes = scores_dims[1]; + numFiltBox.resize(num_batches, std::vector(num_classes)); // batches + numBoxOffset.resize(num_batches); + + if (max_output_boxes_per_class) { + max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class; + filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes); + } + + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType); + + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().GetDesc().getShape().getStaticDims(); + + if (max_output_boxes_per_class == 0) + return; + + int* selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); + + float* selected_outputs = selected_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + + int* selected_num = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr()); + + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + + if ((nms_eta >= 0) && (nms_eta < 1)) { + nmsWithEta(boxes, scores, boxesStrides, scoresStrides); + } else { + nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides); + } + + size_t startOffset = numFiltBox[0][0]; + numBoxOffset[0] = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + size_t batchOffsetNew = 0; + size_t batchOffset = b * num_classes * max_output_boxes_per_class; + for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) { + size_t offset = batchOffset + c * max_output_boxes_per_class; + for (size_t i = 0; i < numFiltBox[b][c]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numFiltBox[b][c]; + batchOffsetNew += numFiltBox[b][c]; + } + numBoxOffset[b] = batchOffsetNew; + if (b == 0) + numBoxOffset[b] += numFiltBox[0][0]; + } + // sort element before go through keep_top_k + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) || + ((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + + if (keep_top_k > -1) { + startOffset = 0; + size_t offset = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + if (numBoxOffset[b] > keep_top_k) { + if (startOffset == offset) { + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < keep_top_k; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } + } else { + if (startOffset == offset) { + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < numBoxOffset[b]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } + } + } + } + + if (sort_result_across_batch) { + if (sort_result_type == SCORE) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index); + }); + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index); + }); + } + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && + ((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) || + ((std::fabs(l.score - r.score) <= 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + } + + const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().GetDesc().getShape().getStaticDims()[0]; + const size_t validOutputs = std::min(startOffset, selectedBoxesNum); + + std::vector m_selected_num; + m_selected_num.resize(dims_boxes[0]); + + const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0]; + + for (size_t idx = 0lu; idx < validOutputs; idx++) { + m_selected_num[filtBoxes[idx].batch_index]++; + } + + int64_t output_offset = 0; + int64_t original_offset = 0; + for (size_t i = 0; i < dims_boxes[0]; i++) { + auto real_boxes = m_selected_num[i]; + selected_num[i] = static_cast(real_boxes); + + for (size_t j = 0; j < real_boxes; j++) { + auto original_index = original_offset + j; + selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index; + auto selected_base = selected_outputs + (output_offset + j) * 6; + selected_base[0] = filtBoxes[original_index].class_index; + selected_base[1] = filtBoxes[original_index].score; + selected_base[2] = boxes[selected_indices[j + output_offset] * 4]; + selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1]; + selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2]; + selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3]; + } + std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1); + std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1); + output_offset += selectedBoxesNum_perBatch; + original_offset += real_boxes; + } +} + +bool MKLDNNMultiClassNmsNode::created() const { + return getType() == MulticlassNms; +} + +float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) { + float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; + const float norm = static_cast(normalized == false); + + // to align with reference + yminI = boxesI[0]; + xminI = boxesI[1]; + ymaxI = boxesI[2]; + xmaxI = boxesI[3]; + yminJ = boxesJ[0]; + xminJ = boxesJ[1]; + ymaxJ = boxesJ[2]; + xmaxJ = boxesJ[3]; + + float areaI = (ymaxI - yminI + norm) * (xmaxI - xminI + norm); + float areaJ = (ymaxJ - yminJ + norm) * (xmaxJ - xminJ + norm); + if (areaI <= 0.f || areaJ <= 0.f) + return 0.f; + + float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ) + norm, 0.f) * + (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ) + norm, 0.f); + return intersection_area / (areaI + areaJ - intersection_area); +} + +void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + auto less = [](const boxInfo& l, const boxInfo& r) { + return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); + }; + + auto func = [](float iou, float adaptive_threshold) { + return iou <= adaptive_threshold ? 1.0f : 0.0f; + }; + + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + std::vector fb; + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::priority_queue, decltype(less)> sorted_boxes(less); + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); + } + fb.reserve(sorted_boxes.size()); + if (sorted_boxes.size() > 0) { + auto adaptive_threshold = iou_threshold; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + while (max_out_box && !sorted_boxes.empty()) { + boxInfo currBox = sorted_boxes.top(); + float origScore = currBox.score; + sorted_boxes.pop(); + max_out_box--; + + bool box_is_selected = true; + for (int idx = static_cast(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) { + float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized); + currBox.score *= func(iou, adaptive_threshold); + if (iou >= adaptive_threshold) { + box_is_selected = false; + break; + } + if (currBox.score <= score_threshold) + break; + } + + currBox.suppress_begin_index = fb.size(); + if (box_is_selected) { + if (nms_eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= nms_eta; + } + if (currBox.score == origScore) { + fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx}); + continue; + } + if (currBox.score > score_threshold) { + sorted_boxes.push(currBox); + } + } + } + } + numFiltBox[batch_idx][class_idx] = fb.size(); + size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + for (size_t i = 0; i < fb.size(); i++) { + filtBoxes[offset + i] = fb[i]; + } + } + }); +} + +void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::vector> sorted_boxes; + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx)); + } + + int io_selection_size = 0; + if (sorted_boxes.size() > 0) { + parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair& l, const std::pair& r) { + return (l.first > r.first || ((l.first == r.first) && (l.second < r.second))); + }); + int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second); + io_selection_size++; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) { + bool box_is_selected = true; + for (int idx = io_selection_size - 1; idx >= 0; idx--) { + float iou = + intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized); + if (iou >= iou_threshold) { + box_is_selected = false; + break; + } + } + + if (box_is_selected) { + filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second); + io_selection_size++; + } + } + } + numFiltBox[batch_idx][class_idx] = io_selection_size; + } + }); +} + +void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms) \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp new file mode 100644 index 00000000000000..0627f72cea0df8 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp @@ -0,0 +1,93 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +namespace MKLDNNPlugin { + +enum MulticlassNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +class MKLDNNMultiClassNmsNode : public MKLDNNNode { +public: + MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input (port Num) + const size_t NMS_BOXES = 0; + const size_t NMS_SCORES = 1; + + // output (port Num) + const size_t NMS_SELECTEDOUTPUTS = 0; + const size_t NMS_SELECTEDINDICES = 1; + const size_t NMS_SELECTEDNUM = 2; + + bool sort_result_across_batch = false; + MulticlassNmsSortResultType sort_result_type = NONE; + + size_t num_batches; + size_t num_boxes; + size_t num_classes; + + int max_output_boxes_per_class = 0; + float iou_threshold = 0.0f; + float score_threshold = 0.0f; + + int32_t background_class = 0; + int32_t keep_top_k = 0; + float nms_eta = 0.0f; + bool normalized = true; + + std::string errorPrefix; + + std::vector> numFiltBox; + std::vector numBoxOffset; + const std::string inType = "input", outType = "output"; + + struct filteredBoxes { + float score; + int batch_index; + int class_index; + int box_index; + filteredBoxes() = default; + filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) + : score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {} + }; + + struct boxInfo { + float score; + int idx; + int suppress_begin_index; + }; + + std::vector filtBoxes; + + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + float intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized); + + void nmsWithEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, const InferenceEngine::SizeVector& scoresStrides); + + void nmsWithoutEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, + const InferenceEngine::SizeVector& scoresStrides); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index baff79e5d75317..f476aa8dec5231 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -733,7 +733,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { !getParentEdgeAt(0)->getParent()->isConstant(); const size_t inputsNum = getParentEdges().size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(inputsNum); config.outConfs.resize(1); @@ -742,17 +742,15 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; if (inputsNum == 2) { - const auto dims = getParentEdgeAt(1)->getDims().ToSizeVector(); - config.inConfs[1].desc = TensorDesc(Precision::I32, - dims, - TensorDesc::getLayoutByDims(dims)); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::s32, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank())); config.inConfs[1].constant = true; } auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format); - config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), outputDataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_type, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_type}); }; impl_desc_type impl_type; @@ -768,22 +766,22 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { if (mayiuse(cpu::x64::sse41)) { // nspc - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nhwc, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::ndhwc, impl_type); } // blk if (impl_desc_type::jit_avx512 == impl_type) { - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nChw16c, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::nCdhw16c, impl_type); } } else if (impl_desc_type::jit_avx2 == impl_type || impl_desc_type::jit_sse42 == impl_type) { - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nChw8c, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::nCdhw8c, impl_type); } } @@ -792,7 +790,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { // planar if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()), impl_type); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type); } void MKLDNNMVNNode::createPrimitive() { @@ -805,15 +803,15 @@ void MKLDNNMVNNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - const SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const SizeVector in_dims = getParentEdgeAt(0)->getShape().getStaticDims(); transformTo5DCase(in_dims); auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_mvn_config_params(); - jcp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); - jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision(); + jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision(); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc)); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc)); - jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout(); + jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp); jcp.normalize_variance = normalizeVariance_; jcp.across_channels = acrossChannels_; int N = 0; @@ -913,13 +911,12 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dim = getParentEdgeAt(0)->getDesc().getDims(); + auto dim = getParentEdgeAt(0)->getShape().getStaticDims(); if (mayiuse(cpu::x64::sse41)) { if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform."; } - Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); - if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { mvn_pln(src_data, dst_data, dim); } else { mvn_blk(src_data, dst_data, dim); @@ -1173,10 +1170,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1; std::tie(N, C, D, H, W) = shape5D; - bool is_nhwc = false; - Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); - if (layout == NHWC || layout == NDHWC) - is_nhwc = true; + bool is_nhwc = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); size_t CB = div_up(C, blk_size); @@ -1407,7 +1401,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const { } // limit post ops to unary when shape transformed on channel // 1D only fused with unary - int inputRank = getParentEdgeAt(0)->getDims().ndims(); + int inputRank = getParentEdgeAt(0)->getShape().getRank(); bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp index 093127eada5f9a..a6c0bc07b2840e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp @@ -114,18 +114,18 @@ void MKLDNNNonMaxSuppressionNode::initSupportedPrimitiveDescriptors() { checkOutput(outputShape_SELECTEDINDICES, supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES); checkOutput(outputShape_SELECTEDSCORES, supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES); - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) { Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32; - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, inPrecision); + inDataConf.emplace_back(LayoutType::ncsp, inPrecision); } - std::vector outDataConf; + std::vector outDataConf; outDataConf.reserve(getOriginalOutputsNumber()); for (int i = 0; i < getOriginalOutputsNumber(); ++i) { Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32; - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, outPrecision); + outDataConf.emplace_back(LayoutType::ncsp, outPrecision); } addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); @@ -135,24 +135,24 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); - max_output_boxes_per_class = outDims.size() > NMS_SELECTEDSCORES ? 0 : num_boxes; - if (inDims.size() > NMS_MAXOUTPUTBOXESPERCLASS) { + max_output_boxes_per_class = outputShapes.size() > NMS_SELECTEDSCORES ? 0 : num_boxes; + if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { max_output_boxes_per_class = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0]; } if (max_output_boxes_per_class == 0) return; - iou_threshold = outDims.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f; - if (inDims.size() > NMS_IOUTHRESHOLD) + iou_threshold = outputShapes.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f; + if (inputShapes.size() > NMS_IOUTHRESHOLD) iou_threshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0]; score_threshold = 0.0f; - if (inDims.size() > NMS_SCORETHRESHOLD) + if (inputShapes.size() > NMS_SCORETHRESHOLD) score_threshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0]; soft_nms_sigma = 0.0f; - if (inDims.size() > NMS_SOFTNMSSIGMA) + if (inputShapes.size() > NMS_SOFTNMSSIGMA) soft_nms_sigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0]; scale = 0.0f; if (soft_nms_sigma > 0.0) { @@ -162,15 +162,15 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { int *selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); float *selected_scores = nullptr; - if (outDims.size() > NMS_SELECTEDSCORES) + if (outputShapes.size() > NMS_SELECTEDSCORES) selected_scores = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->GetPtr()); int *valid_outputs = nullptr; - if (outDims.size() > NMS_VALIDOUTPUTS) + if (outputShapes.size() > NMS_VALIDOUTPUTS) valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getDesc().getBlockingDesc().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getDesc().getBlockingDesc().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); std::vector filtBoxes(max_output_boxes_per_class * num_batches * num_classes); @@ -205,10 +205,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { }); } - const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDims()[0]; + const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getShape().getStaticDims()[0]; const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum); - int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDesc().getBlockingDesc().getStrides()[0]; + int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemory().GetDescWithType().getStrides()[0]; int *selectedIndicesPtr = selected_indices; float *selectedScoresPtr = selected_scores; @@ -218,7 +218,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { selectedIndicesPtr[1] = filtBoxes[idx].class_index; selectedIndicesPtr[2] = filtBoxes[idx].box_index; selectedIndicesPtr += selectedIndicesStride; - if (outDims.size() > NMS_SELECTEDSCORES) { + if (outputShapes.size() > NMS_SELECTEDSCORES) { selectedScoresPtr[0] = static_cast(filtBoxes[idx].batch_index); selectedScoresPtr[1] = static_cast(filtBoxes[idx].class_index); selectedScoresPtr[2] = static_cast(filtBoxes[idx].score); @@ -226,10 +226,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { } } std::fill(selectedIndicesPtr, selectedIndicesPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1); - if (outDims.size() > NMS_SELECTEDSCORES) { + if (outputShapes.size() > NMS_SELECTEDSCORES) { std::fill(selectedScoresPtr, selectedScoresPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1.f); } - if (outDims.size() > NMS_VALIDOUTPUTS) + if (outputShapes.size() > NMS_VALIDOUTPUTS) *valid_outputs = static_cast(validOutputs); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index ff95f416573a25..10b1be0dac532d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -660,7 +660,7 @@ MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr } } -bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto norm = std::dynamic_pointer_cast(op); if (!norm) { @@ -682,8 +682,10 @@ bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptrgetDims().ndims() > 4 || getParentEdgeAt(0)->getDims().ndims() < 2) { + if (getParentEdgeAt(0)->getShape().getRank() > 4 || getParentEdgeAt(0)->getShape().getRank() < 2) { IE_THROW() << errorPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs."; } } @@ -757,21 +759,22 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(DATA)->getParent()->getChildEdges().size() == 1; - LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.outConfs.resize(1); config.outConfs[0].inPlace = canBeInplace ? 0 : -1; auto pushDesc = [&](memory::format_tag format) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), inputDataType, format); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), outputDataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), inputDataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES)->getShape().getStaticDims(), memory::data_type::s32, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), outputDataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; // only plain layout support when w/o sse42 - if (getParentEdgeAt(DATA)->getDims().ndims() == 4 && !cornerCase) { + if (getParentEdgeAt(DATA)->getShape().getRank() == 4 && !cornerCase) { if (mayiuse(cpu::x64::sse41)) { pushDesc(memory::format_tag::nhwc); if (mayiuse(cpu::x64::avx512_common)) { @@ -783,7 +786,7 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { } if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(DATA)->getDims())); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank())); } bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { @@ -824,22 +827,23 @@ void MKLDNNNormalizeL2Node::createPrimitive() { if (!cornerCase) { auto selectedPD = getSelectedPrimitiveDescriptor(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc->getPrecision()); + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision()); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false; - if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { jcp.is_nchw = true; - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { jcp.is_blk = true; } else { jcp.is_nhwc = true; } jcp.across_spatial = across_spatial; - auto dims = getParentEdgeAt(0)->getDesc().getDims(); + auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); size_t dims_size = dims.size(); jcp.n = (dims_size > 0) ? dims[0] : 1lu; jcp.c = (dims_size > 1) ? dims[1] : 1lu; @@ -905,7 +909,7 @@ void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) { const uint8_t *src_ptr = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t *dst_ptr = reinterpret_cast(dstMemPtr->GetPtr()); - auto dims = getParentEdgeAt(DATA)->getDesc().getDims(); + auto dims = getParentEdgeAt(DATA)->getShape().getStaticDims(); NormalizeContext ctx = { *this, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h index bcb7b0d8d491f2..6b6a62bf42c418 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h @@ -84,7 +84,7 @@ class MKLDNNNormalizeL2Node : public MKLDNNNode { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; bool canFuse(const MKLDNNNodePtr& node) const override; private: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp index 8f164c33c1876c..350e86e556e85d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_one_hot_node.h" -#include +#include #include #include "common/cpu_memcpy.h" @@ -89,11 +89,11 @@ void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() { } output_precision = getOriginalOutputPrecisionAtPort(0); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, output_precision}, - {TensorDescCreatorTypes::ncsp, output_precision}}, - {{TensorDescCreatorTypes::ncsp, output_precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, output_precision}, + {LayoutType::ncsp, output_precision}}, + {{LayoutType::ncsp, output_precision}}, impl_desc_type::ref_any); } @@ -125,13 +125,13 @@ void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) { void MKLDNNOneHotNode::execute(mkldnn::stream strm) { std::size_t prefix_size = 1; - auto input_dims = getParentEdgeAt(0)->getDesc().getDims(); + auto input_dims = getParentEdgeAt(0)->getShape().getStaticDims(); std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis; for (size_t i = 0; i < actual_axis; ++i) prefix_size *= input_dims[i]; - std::size_t suffix_size = getParentEdgeAt(0)->getBlob()->size() / prefix_size; + std::size_t suffix_size = getParentEdgeAt(0)->getShape().getElementsCount() / prefix_size; OneHotContext ctx = {this, prefix_size, suffix_size}; OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index b4ef82481ca20d..584eb4bce79051 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -92,8 +92,8 @@ void MKLDNNPadNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "Incorrect number of output edges"; - const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); - const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getDims().ToSizeVector(); + const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getShape().getStaticDims(); if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size()) IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!"; @@ -122,22 +122,26 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32; auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - int numOfDims = srcDims.ToSizeVector().size(); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + int numOfDims = srcDims.size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(isPadValueSpecified ? 4 : 3); config.outConfs.resize(1); auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_BEGIN_ID)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_END_ID)->getDims(), memory::data_type::s32, memory::format_tag::x); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, + memoryFormat); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); if (isPadValueSpecified) - config.inConfs[3].desc = MKLDNNMemoryDesc(getParentEdgeAt(PAD_VALUE_ID)->getDims(), memory::data_type::f32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memoryFormat}); + config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(), + memory::data_type::f32, memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); }; if (numOfDims == 4) @@ -145,7 +149,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { else if (numOfDims == 5) pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::ndhwc); - pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims())); + pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); auto canUseBlocked = [=](const size_t blockSize) { return (padMode == CONSTANT && padsBegin[1] % blockSize == 0 && padsEnd[1] % blockSize == 0) || @@ -175,10 +179,11 @@ void MKLDNNPadNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor for Pad " << getName() << " is not set."; - params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); - params.srcDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - params.dstDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + params.srcDims = inBlkDesc.getBlockDims(); + params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t nDims = params.srcDims.size(); params.srcStrides.resize(nDims, 1); @@ -188,13 +193,14 @@ void MKLDNNPadNode::createPrimitive() { params.dstStrides[i] = params.dstStrides[i + 1] * params.dstDims[i + 1]; } - if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { padsBegin[1] /= params.srcDims[params.srcDims.size() - 1]; padsEnd[1] /= params.srcDims[params.srcDims.size() - 1]; padsBegin.push_back(0); padsEnd.push_back(0); } else { - auto order = getParentEdgeAt(0)->getDesc().getBlockingDesc().getOrder(); + auto order = inBlkDesc.getOrder(); std::vector newPadsBegin(padsBegin.size(), 0), newPadsEnd(padsEnd.size(), 0); for (size_t i = 0; i < padsBegin.size(); ++i) { newPadsBegin[i] = padsBegin[order[i]]; @@ -304,7 +310,7 @@ void MKLDNNPadNode::padConstant() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU Pad node with name '" << getName() << "' doesn't have primitive descriptors."; - InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc.getPrecision(); + InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc->getPrecision(); OV_SWITCH(MKLDNNPlugin, PadConstantEmitter, this, precision, OV_CASE(InferenceEngine::Precision::FP32, float), OV_CASE(InferenceEngine::Precision::I32, int32_t), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index c7a007d0c6f40f..5d6e900d75d9fd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -13,6 +13,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -60,18 +61,18 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, co } } -std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc, memory::format_tag::ntc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; return {memory::format_tag::any}; } @@ -112,15 +113,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { effective_pad_begin = data_pad_begin; effective_pad_end.resize(data_pad_end.size()); - auto parentDims = getParentEdgeAt(0)->getDims(); - auto childDims = getChildEdgeAt(0)->getDims(); - if ((parentDims.ndims() < 4) || (parentDims.ndims() > 5)) + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const size_t inputRank = getParentEdgeAt(0)->getShape().getRank(); + + if ((inputRank < 4) || (inputRank > 5)) IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; for (int i = 0; i < effective_pad_end.size(); i++) { int krn = kernel[i]; - int src = getParentEdgeAt(0)->getDims()[2 + i]; - int dst = getChildEdgeAt(0)->getDims()[2 + i]; + int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1; effective_pad_end[i] = (dst - calc_dst) * stride[i]; @@ -130,24 +133,28 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; // i8 layers supports only ndhwc and nhwc layouts - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc}; - createDescriptor({ in_candidate }, { out_candidate }); - } else if ((parentDims.ndims() == 4 || parentDims.ndims() == 5) && parentDims[1] == 1) { + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + } else if ((inputRank == 4 || inputRank == 5) && parentDims[1] == 1) { // WA. We should force planar layout since it provides better performance - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw}; - createDescriptor({ in_candidate }, { out_candidate }); + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { if (inputDataType != memory::data_type::bf16) { inputDataType = memory::data_type::f32; outputDataType = memory::data_type::f32; } // It doesn't support any format - for (auto format : getAvailableFormatsForDims(parentDims)) { - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, format}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, format}; - createDescriptor({in_candidate}, {out_candidate}); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, format); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, format); + createDescriptor({in_candidate.get()}, {out_candidate.get()}); } } } @@ -172,10 +179,10 @@ bool MKLDNNPoolingNode::created() const { return getType() == Pooling; } -void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); - MKLDNNMemoryDesc out_candidate(outputDesc[0]); +void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); mkldnn::algorithm alg; if (algorithm == PoolingAvg) { @@ -240,21 +247,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { for (auto& desc : descs) { auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i)); + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + dataConfig.desc = getSrcMemDesc(itpd, i); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i)); + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + dataConfig.desc = getDstMemDesc(itpd, i); config.outConfs.push_back(dataConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -266,23 +275,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &config) { +void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc); - std::vector outDescs; + inDescs.push_back(inConf.desc.get()); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc); + outDescs.push_back(outConf.desc.get()); createDescriptor({inDescs}, {outDescs}); mkldnn::primitive_attr attr; setPostOps(attr); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + NodeConfig rightConfig = selectedPD->getConfig(); size_t selected_count = 0; for (size_t j = 0; j < descs.size(); j++) { const auto &desc = descs[j]; @@ -291,10 +300,10 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (itpd) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, i); @@ -302,7 +311,7 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, i); @@ -332,20 +341,18 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi return; for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { - if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc)) + if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { - if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc)) + if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } rightConfig = config; } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h index 1f6acf58b78682..a594e774e47709 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h @@ -16,12 +16,12 @@ class MKLDNNPoolingNode : public MKLDNNNode { public: MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; - std::vector getAvailableFormatsForDims(const MKLDNNDims &dims) const override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; + std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; - void initDescriptor(const InferenceEngine::LayerConfig &config) override; + void initDescriptor(const NodeConfig& config) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp index 584960373aeb2e..e7421d82f1270c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -141,17 +140,17 @@ void MKLDNNProposalNode::initSupportedPrimitiveDescriptors() { return; if (store_prob) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } else { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } } @@ -166,8 +165,8 @@ void MKLDNNProposalNode::execute(mkldnn::stream strm) { if (store_prob) outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); - auto inProbDims = getParentEdgeAt(0)->getDims().ToSizeVector(); - const size_t imgInfoSize = getParentEdgeAt(2)->getDims()[0]; + auto inProbDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t imgInfoSize = getParentEdgeAt(2)->getShape().getStaticDims()[0]; // input image height & width const float imgHeight = imgInfoData[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp index 393ef27921a7e6..e56d6d2c245563 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp @@ -12,7 +12,7 @@ #include #include "mkldnn_psroi_pooling_node.h" #include -#include +#include using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -139,27 +139,27 @@ void MKLDNNPSROIPoolingNode::initSupportedPrimitiveDescriptors() { auto dataPrecision = getOriginalInputPrecisionAtPort(0) == Precision::BF16 ? Precision::BF16 : Precision::FP32; if (getAlgorithm() == Algorithm::PSROIPoolingAverage || getAlgorithm() == Algorithm::PSROIPoolingBilinear) { - std::vector> dataFomats{ - {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::nspc, TensorDescCreatorTypes::nspc}, - {TensorDescCreatorTypes::nCsp16c, TensorDescCreatorTypes::nCsp16c}, - {TensorDescCreatorTypes::nCsp8c, TensorDescCreatorTypes::nCsp8c} + std::vector> dataFomats{ + {LayoutType::ncsp, LayoutType::ncsp}, + {LayoutType::nspc, LayoutType::nspc}, + {LayoutType::nCsp16c, LayoutType::nCsp16c}, + {LayoutType::nCsp8c, LayoutType::nCsp8c} }; for (const auto &df : dataFomats) { - addSupportedPrimDesc({{df.first, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{df.first, dataPrecision}, {LayoutType::ncsp, Precision::FP32}}, {{df.second, dataPrecision}}, impl_type); } } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable && noTrans) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_type); } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_type); } } @@ -182,19 +182,18 @@ inline float bilinearInterp(const inputType* data, const float x, const float y, return value; } -void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc, +void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, - Layout& inFmt, Layout& outFmt, int& inBlockSize, int& outBlockSize, int& outBlockCount, unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) { - inFmt = srcDesc.getLayout(); - outFmt = dstDesc.getLayout(); - int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4); - int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4); - auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims(); - auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims(); + const bool inpIsBlk = srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c); + const bool outIsBlk = dstDesc.hasLayoutType(LayoutType::nCsp16c) || dstDesc.hasLayoutType(LayoutType::nCsp8c); + int expectedInBlockDimsSize = (inpIsBlk ? 5 : 4); + int expectedOutBlockDimsSize = (outIsBlk ? 5 : 4); + auto inBlkDims = srcDesc.getBlockDims(); + auto outBlkDims = dstDesc.getBlockDims(); if (inBlkDims.size() != expectedInBlockDimsSize) IE_THROW() << errorPrefix << " has unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected " << expectedInBlockDimsSize << ")"; @@ -202,15 +201,15 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso IE_THROW() << errorPrefix << " has unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected " << expectedOutBlockDimsSize << ")"; - inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1); - outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1); - inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize; - outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize; + inBlockSize = (inpIsBlk ? srcDesc.getBlockDims()[4] : 1); + outBlockSize = (outIsBlk ? dstDesc.getBlockDims()[4] : 1); + inputChannelsPadding = srcDesc.getBlockDims()[1] * inBlockSize; + outputChannelsPadding = dstDesc.getBlockDims()[1] * outBlockSize; outBlockCount = outputChannelsPadding / outBlockSize; int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0; - const auto& outOrder = dstDesc.getBlockingDesc().getOrder(); - const auto& inOrder = srcDesc.getBlockingDesc().getOrder(); + const auto& outOrder = dstDesc.getOrder(); + const auto& inOrder = srcDesc.getOrder(); for (int i = 0; i < outOrder.size(); i++) { if (outOrder[i] == 2) hOutStrIndex = i; if (outOrder[i] == 3) wOutStrIndex = i; @@ -219,21 +218,20 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso if (inOrder[i] == 2) hInStrIndex = i; if (inOrder[i] == 3) wInStrIndex = i; } - hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex]; - wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex]; - hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex]; - wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex]; + hInputStride = srcDesc.getStrides()[hInStrIndex]; + wInputStride = srcDesc.getStrides()[wInStrIndex]; + hOutputStride = dstDesc.getStrides()[hOutStrIndex]; + wOutputStride = dstDesc.getStrides()[wOutStrIndex]; } template void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const TensorDesc& srcDesc, const TensorDesc& dstDesc) { - Layout inFmt, outFmt; + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, - inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); + inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); const float roiStartW = static_cast(round(bottomRois[1])) * spatialScale; const float roiStartH = static_cast(round(bottomRois[2])) * spatialScale; const float roiEndW = static_cast(round(bottomRois[3] + 1.0f)) * spatialScale; @@ -273,7 +271,7 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType dstData[dstIndex] = outSum / binArea; } }; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { parallel_for2d(nh, nw, [&](int h, int w) { const int binOffsetOutput = n * nc * nh * nw; const int binOffsetInput = roiBatchInd * channels * height * width; @@ -282,10 +280,10 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c); } }); - } else if (inFmt == Layout::NCHW) { + } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) { parallel_for3d(nc, nh, nw, [&](int c, int h, int w) { const int gc = (c * groupSize + h) * groupSize + w; - const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize); + const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize); const int outputBlockIdx = (c / outBlockSize) * outBlockSize; const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width; const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw; @@ -297,8 +295,8 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize); for (int c = cStart; c < cEnd; c++) { const int gc = (c * groupSize + h) * groupSize + w; - const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize); - const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize); + const int inputBlockResidual = (srcDesc.hasLayoutType(LayoutType::ncsp) ? 0 : gc % inBlockSize); + const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize); const int inputBlockIdx = (gc / inBlockSize) * inBlockSize; const int outputBlockIdx = (c / outBlockSize) * outBlockSize; const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width; @@ -312,12 +310,11 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType template void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const TensorDesc& srcDesc, const TensorDesc& dstDesc) { - Layout inFmt, outFmt; + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, - inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); + inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); const float roiStartW = bottomRois[1] * spatialScale; const float roiStartH = bottomRois[2] * spatialScale; const float roiEndW = bottomRois[3] * spatialScale; @@ -340,13 +337,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1); for (size_t binX = 0; binX < spatialBinsX; binX++) { size_t gc = c + (binY * spatialBinsX + binX) * nc; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { binOffIn = roiBatchInd * channels * height * width + gc; inBlkRes = 0; } else { // nchw, nChw16c, nChw8c const int inputBlockIdx = (gc / inBlockSize) * inBlockSize; binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width; - inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0); + inBlkRes = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c)) + ? gc % inBlockSize : 0); } const auto *bottomData = srcData + binOffIn; @@ -386,14 +384,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp dstData[dstIndex] = accum; }; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { const int binOffsetOutput = currentRoi * nc * nh * nw; parallel_for2d(nh, nw, [&](int h, int w) { for (int c = 0; c < nc; c++) { bilinearPsroi(c, h, w, 0, binOffsetOutput + c); } }); - } else if (inFmt == Layout::NCHW) { + } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) { parallel_for3d(nc, nh, nw, [&](int c, int h, int w) { bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount); }); @@ -404,7 +402,8 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp for (int c = cStart; c < cEnd; c++) { const int outputBlockIdx = (c / inBlockSize) * inBlockSize; const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount; - const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0); + const int outputBlockResidual = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c)) + ? c % inBlockSize : 0); bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput); } }); @@ -480,8 +479,8 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto srcDesc = getParentEdgeAt(0)->getDesc(); - auto dstDesc = getChildEdgeAt(0)->getDesc(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); int realRois = 0; for (; realRois < nn; realRois++) { @@ -497,7 +496,7 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { int channelsEachClass = outputDim; if (!noTrans) { bottomTrans = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - numClasses = static_cast(getParentEdgeAt(2)->getDesc().getDims()[1]) / 2; + numClasses = static_cast(getParentEdgeAt(2)->getShape().getStaticDims()[1]) / 2; channelsEachClass /= numClasses; } @@ -534,8 +533,8 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { }; void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgesAtPort(0)[0]->getDesc().getPrecision(); - auto outputPrec = getChildEdgesAtPort(0)[0]->getDesc().getPrecision(); + auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); + auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) || (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h index 24e015d3a6da97..45f275fe1ddff5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h @@ -50,10 +50,9 @@ class MKLDNNPSROIPoolingNode : public MKLDNNNode { std::string errorPrefix; - void unpackParams(const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc, + void unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, - InferenceEngine::Layout& inFmt, InferenceEngine::Layout& outFmt, int& inBlockSize, int& outBlockSize, int& outBlockCount, unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding); @@ -61,12 +60,12 @@ class MKLDNNPSROIPoolingNode : public MKLDNNNode { template void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); template void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); template void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp index 33e625fce6f88a..86818d36140967 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -65,8 +63,8 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; - std::vector outDataConf; + std::vector inDataConf; + std::vector outDataConf; if (!(getOriginalInputPrecisionAtPort(RANGE_START) == Precision::I32 && getOriginalInputPrecisionAtPort(RANGE_LIMIT) == Precision::I32 && @@ -78,23 +76,23 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { getOriginalOutputPrecisionAtPort(0) == Precision::FP32)) { inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); outDataConf.reserve(1); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } else { inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp); + inDataConf.emplace_back(LayoutType::ncsp); outDataConf.reserve(1); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp); + outDataConf.emplace_back(LayoutType::ncsp); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } } void MKLDNNRangeNode::execute(mkldnn::stream strm) { StatusCode retcode = OK; - switch (getParentEdgeAt(0)->getDesc().getPrecision()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision()) { case Precision::FP32: retcode = rangeKernel(); break; @@ -112,7 +110,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) { template InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() noexcept { - size_t dst_size = (getChildEdgesAtPort(0)[0]->getDims())[0]; + size_t dst_size = (getChildEdgesAtPort(0)[0]->getShape().getStaticDims())[0]; data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); data_t start = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0]; data_t limit = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp index 7828bc55f276cf..c76156ec4ae464 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp @@ -1405,18 +1405,18 @@ void MKLDNNReduceNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " gets incorrect number of output edges!"; - if (getParentEdgeAt(REDUCE_INDEXES)->getDims().ndims() != 1) { + if (getParentEdgeAt(REDUCE_INDEXES)->getShape().getRank() != 1) { IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension."; } if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims()) + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() != getChildEdgeAt(0)->getShape().getRank()) IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!"; } else { // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d. // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases. - bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1; - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d) + bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 1 && getChildEdgeAt(0)->getShape().getRank() == 1; + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= getChildEdgeAt(0)->getShape().getRank() && !is_emulated_0d_as_1d) IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!"; } } @@ -1436,7 +1436,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { Precision inputPrecision = getOriginalInputPrecisionAtPort(REDUCE_DATA); Precision outputPrecision = getOriginalOutputPrecisionAtPort(0); - jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= 5 && + jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= 5 && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), inputPrecision) != std::end(supportedPrecisions) && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), outputPrecision) != std::end(supportedPrecisions); @@ -1461,7 +1461,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.outConfs.resize(1); @@ -1474,10 +1474,12 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag outFormat, memory::data_type inDataType, memory::data_type outDataType, impl_desc_type impl_type) { - config.inConfs[REDUCE_DATA].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_DATA)->getDims(), inDataType, inFormat); - config.inConfs[REDUCE_INDEXES].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_INDEXES)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outDataType, outFormat); - supportedPrimitiveDescriptors.push_back({config, impl_type, outFormat}); + config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(), + inDataType, inFormat); + config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat); + supportedPrimitiveDescriptors.push_back({config, impl_type}); }; if (jit_mode) { @@ -1488,16 +1490,16 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::jit_avx2; } - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())), inputDataType, outputDataType, impl_type); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type); if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 4 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) { + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 4 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nChw16c, memory::format_tag::nChw16c, inputDataType, outputDataType, impl_type); } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { pushDesc(memory::format_tag::nChw8c, memory::format_tag::nChw8c, inputDataType, outputDataType, impl_type); } - } else if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 5 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) { + } else if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 5 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nCdhw16c, memory::format_tag::nCdhw16c, inputDataType, outputDataType, impl_type); } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { @@ -1506,8 +1508,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { } } } else { - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())), + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), memory::data_type::f32, memory::data_type::f32, impl_desc_type::ref); } } @@ -1524,11 +1526,11 @@ void MKLDNNReduceNode::createPrimitive() { IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor"; auto selectedPD = getSelectedPrimitiveDescriptor(); - planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().isPlainFormat(); + planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp); auto jcp = jit_reduce_config_params(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc->getPrecision()); + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision()); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.planar_layout = planar_layout; @@ -1564,8 +1566,8 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) { const auto idx_data = reinterpret_cast(srcIndexesMemPtr->GetData()); size_t dst_size = dstMemPtr->GetSize(); - src_dims = getParentEdgeAt(REDUCE_DATA)->getDesc().getDims(); - src_strides = getParentEdgeAt(REDUCE_DATA)->getDesc().getBlockingDesc().getStrides(); + src_dims = getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(); + src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType().getStrides(); dims_size = src_dims.size(); calc_process_dst_dims(idx_data); @@ -1930,9 +1932,9 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) { SizeVector out_dims; - SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims(); + SizeVector dst_dims = getChildEdgeAt(0)->getShape().getStaticDims(); std::set axes; - for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getDims()[0]; i++) { + for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims()[0]; i++) { int32_t axis = idx_data[i]; if (axis < 0) axis += src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp index e4950732ab6f15..f7ddad8b6794a4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "common/blocked_desc_creator.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -28,45 +29,32 @@ void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::LayerConfig config; - for (size_t i = 0; i < inDims.size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - dataConfig.desc = MKLDNNMemoryDesc(inDims[i], - MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_input_element_type(i))), - MKLDNNMemory::GetPlainFormat(inDims[i])); - - config.inConfs.push_back(dataConfig); + std::vector inputConfigurators; + inputConfigurators.reserve(inputShapes.size()); + for (size_t i = 0; i < inputShapes.size(); i++) { + inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_input_element_type(i)), inputShapes[i]); } - for (size_t i = 0; i < outDims.size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - dataConfig.desc = MKLDNNMemoryDesc(outDims[i], - MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_output_element_type(i))), - MKLDNNMemory::GetPlainFormat(outDims[i])); - - config.outConfs.push_back(dataConfig); + std::vector outputConfigurators; + outputConfigurators.reserve(inputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { + outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_output_element_type(i)), outputShapes[i]); } - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memory::format_tag::undef}); + addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref); } void MKLDNNReferenceNode::createPrimitive() {} void MKLDNNReferenceNode::execute(mkldnn::stream strm) { ngraph::HostTensorVector inputs; - for (size_t i = 0; i < inDims.size(); i++) { + for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); inputs.push_back(std::make_shared(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr)); } ngraph::HostTensorVector outputs; - for (size_t i = 0; i < outDims.size(); i++) { + for (size_t i = 0; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); outputs.push_back(std::make_shared(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr)); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp index af1159bb07d195..c140baa88c533c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp @@ -8,7 +8,7 @@ #include #include "ie_parallel.hpp" #include "mkldnn_region_yolo_node.h" -#include +#include #include #include "common/cpu_convert.h" #include @@ -291,8 +291,8 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_prec}}, - {{TensorDescCreatorTypes::ncsp, output_prec}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_prec}}, + {{LayoutType::ncsp, output_prec}}, impl_type); } @@ -367,13 +367,10 @@ inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int cou } void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { - auto inputDesc = getParentEdgeAt(0)->getDesc(); - auto outputDesc = getChildEdgeAt(0)->getDesc(); - - size_t B = (inputDesc.getDims().size() > 0) ? inputDesc.getDims()[0] : 1; - size_t IC = (inputDesc.getDims().size() > 1) ? inputDesc.getDims()[1] : 1; - size_t IH = (inputDesc.getDims().size() > 2) ? inputDesc.getDims()[2] : 1; - size_t IW = (inputDesc.getDims().size() > 3) ? inputDesc.getDims()[3] : 1; + size_t B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; + size_t IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; + size_t IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; + size_t IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; size_t mask_size = mask.size(); int end_index = 0; @@ -400,7 +397,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - cpu_convert(src_data, dst_data, inputDesc.getPrecision(), outputDesc.getPrecision(), output_size); + cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(), output_size); for (int b = 0; b < B; b++) { for (int n = 0; n < num_; n++) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index c318468ef1eed7..99bd606a9a08cf 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -25,10 +25,6 @@ MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engi MKLDNNNode("Reorder", name, eng, w_cache) { } void MKLDNNReorderNode::getSupportedDescriptors() { - if (outDims.empty() && output.getLayout() != InferenceEngine::Layout::ANY) - outDims.push_back(MKLDNNDims(output.getDims())); - if (inDims.empty() && input.getLayout() != InferenceEngine::Layout::ANY) - inDims.push_back(MKLDNNDims(input.getDims())); if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) @@ -39,13 +35,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto inputDataType = MKLDNNMemoryDesc(input).getDataType(); - auto outputDataType = MKLDNNMemoryDesc(output).getDataType(); - auto parent = getParentEdgeAt(0)->getParent(); auto child = getChildEdgeAt(0)->getChild(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -57,19 +50,18 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = 0; config.outConfs[0].inPlace = 0; } - if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) { - config.inConfs[0].desc = input; - config.outConfs[0].desc = output; + if (input && output) { + config.inConfs[0].desc = input->clone(); + config.outConfs[0].desc = output->clone(); } else if (parent->getSelectedPrimitiveDescriptor() != nullptr && child->getSelectedPrimitiveDescriptor() != nullptr) { - config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; - config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->clone(); + config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->clone(); } else { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::any); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::any); + IE_THROW() << "Cannot initialize supported PDs for Reorder node with name `" << getName() << "`"; } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder); } void MKLDNNReorderNode::createPrimitive() { @@ -82,21 +74,23 @@ void MKLDNNReorderNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; + auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + if (!isOptimized) { - if (MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) && - getParentEdgeAt(0)->getDims()[1] <= 64 && - getParentEdgeAt(0)->getDims()[1] >= 16 && - (getParentEdgeAt(0)->getMemory().GetElementsCount() / getParentEdgeAt(0)->getDims()[1]) >= 128 && - getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat() && - getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && - getParentEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32 && - getChildEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32) { + if (MKLDNNPlugin::one_of(inDims.size(), 4, 5) && + inDims[1] <= 64 && + inDims[1] >= 16 && + (getParentEdgeAt(0)->getMemory().GetElementsCount() / inDims[1]) >= 128 && + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && + getParentEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32 && + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32) { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation canUseOptimizedNspc2Ncsp = true; } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && - MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) && - getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && - getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat() && + MKLDNNPlugin::one_of(inDims.size(), 4, 5) && + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && getParentEdgeAt(0)->getMemory().GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && MKLDNNExtensionUtils::sizeOfDataType(getParentEdgeAt(0)->getMemory().GetDataType()) == 1) { // oneDNN doesn't provide JIT reorder impl for non-avx2 targets so we fallback on simple c++ implementation which shows better perf @@ -110,29 +104,12 @@ void MKLDNNReorderNode::createPrimitive() { void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) { src_blocked = std::make_shared(getEngine()); - src_blocked->Create(srcDesc, srcPtr, false); + src_blocked->Create(MKLDNNMemoryDesc(srcDesc), srcPtr, false); dst_blocked = std::make_shared(getEngine()); - dst_blocked->Create(dstDesc, dstPtr, false); + dst_blocked->Create(MKLDNNMemoryDesc(dstDesc), dstPtr, false); mkldnn::primitive_attr attr; - - if (_scales) { - std::vector scales; - - float* scaleData = static_cast(_scales->buffer()); - - for (size_t i = 0; i < _scales->size(); i++) { - scales.push_back(scaleData[i]); - } - - int mask = 0; - int oc_dim_id = 1; - mask = 1 << oc_dim_id; - - attr.set_output_scales(mask, scales); - } - auto createReorder = [&]() -> bool { // No autoblocking. Reorder can be applied as is reorder::primitive_desc pd = mkldnn::reorder::primitive_desc(src_blocked->GetPrimitive(), dst_blocked->GetPrimitive(), attr, true); @@ -159,13 +136,13 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats. // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw) // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout. - if (src_blocked->GetDesc().isPlainFormat() && + if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) && src_blocked->GetDims().size() + 1 == dst_blocked->GetDims().size()) { const auto newDims = dst_blocked->GetDims(); - const auto newFormat = MKLDNNMemory::GetPlainFormat(newDims); + const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size()); auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat); - src_blocked->Create(newDesc, srcPtr, false); + src_blocked->Create(MKLDNNMemoryDesc(newDesc), srcPtr, false); success = createReorder(); } @@ -192,12 +169,14 @@ bool MKLDNNReorderNode::created() const { void MKLDNNReorderNode::optimizedNcsp2Nspc() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t DIM0 = parentEdge->getDims()[0]; - const size_t DIM1 = parentEdge->getDims()[1]; - const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t DIM3 = parentEdge->getDims()[ndims - 2]; - const size_t DIM4 = parentEdge->getDims()[ndims - 1]; + + auto inDims = parentEdge->getShape().getStaticDims(); + const size_t ndims = inDims.size(); + const size_t DIM0 = inDims[0]; + const size_t DIM1 = inDims[1]; + const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1; + const size_t DIM3 = inDims[ndims - 2]; + const size_t DIM4 = inDims[ndims - 1]; auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); @@ -221,12 +200,14 @@ void MKLDNNReorderNode::optimizedNcsp2Nspc() { void MKLDNNReorderNode::optimizedNspc2Ncsp() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t DIM0 = parentEdge->getDims()[0]; - const size_t DIM1 = parentEdge->getDims()[1]; - const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t DIM3 = parentEdge->getDims()[ndims - 2]; - const size_t DIM4 = parentEdge->getDims()[ndims - 1]; + + auto inDims = parentEdge->getShape().getStaticDims(); + const size_t ndims = inDims.size(); + const size_t DIM0 = inDims[0]; + const size_t DIM1 = inDims[1]; + const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1; + const size_t DIM3 = inDims[ndims - 2]; + const size_t DIM4 = inDims[ndims - 1]; auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); @@ -279,4 +260,20 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) { createReorderPrimitive(src_d, src_data_hdl, dst_d, dst_data_hdl); } } + +std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc) { + std::string inArgs, outArgs; + if (parentDesc.getPrecision() != childDesc.getPrecision()) { + inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); + outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); + } + auto formatSrc = parentDesc.serializeFormat(); + auto formatDst = childDesc.serializeFormat(); + if (formatSrc != formatDst || one_of(std::string("undef"), formatSrc, formatDst)) { + inArgs += (inArgs.empty() ? "" : "_") + formatSrc; + outArgs += (outArgs.empty() ? "" : "_") + formatDst; + } + return inArgs + "_" + outArgs; +} + REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index 729097453fbe4e..da821878035e37 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace MKLDNNPlugin { @@ -24,9 +25,14 @@ class MKLDNNReorderNode : public MKLDNNNode { bool created() const override; const std::vector& getPrimitivesPriority() override; - void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) { - this->input = input; - this->output = output; + void setDescs(const MemoryDesc& input, const MemoryDesc& output) { + this->input = input.clone(); + inputShapes.clear(); + inputShapes.push_back(this->input->getShape()); + + this->output = output.clone(); + outputShapes.clear(); + outputShapes.push_back(this->output->getShape()); } void setOptimized(bool isOptimized) { @@ -39,17 +45,14 @@ class MKLDNNReorderNode : public MKLDNNNode { return false; } - const InferenceEngine::TensorDesc& getInput() { return input; } - const InferenceEngine::TensorDesc& getOutput() { return output; } + const MemoryDesc& getInput() { return *input; } + const MemoryDesc& getOutput() { return *output; } - /** - * @brief A pointer to a scales blob - */ - InferenceEngine::Blob::Ptr _scales; + static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); private: - InferenceEngine::TensorDesc input; - InferenceEngine::TensorDesc output; + std::unique_ptr input; + std::unique_ptr output; MKLDNNMemoryPtr dst_blocked; MKLDNNMemoryPtr src_blocked; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp index 3db7470e92fba9..48e2eaf9992fc9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -48,8 +46,8 @@ void MKLDNNReorgYoloNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -57,10 +55,10 @@ void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - int IW = (getParentEdgeAt(0)->getDesc().getDims().size() > 3) ? getParentEdgeAt(0)->getDims()[3] : 1; - int IH = (getParentEdgeAt(0)->getDesc().getDims().size() > 2) ? getParentEdgeAt(0)->getDims()[2] : 1; - int IC = (getParentEdgeAt(0)->getDesc().getDims().size() > 1) ? getParentEdgeAt(0)->getDims()[1] : 1; - int B = (getParentEdgeAt(0)->getDesc().getDims().size() > 0) ? getParentEdgeAt(0)->getDims()[0] : 1; + int IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; + int IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; + int IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; + int B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; int ic_off = IC / (stride * stride); int ih_off = IH * stride; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index 543e0a86bcb7c8..81175dcaf41a96 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -35,18 +35,18 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { if (inputDataType != outputDataType) inputDataType = outputDataType; - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(getParentEdges().size()); for (size_t i = 0; i getDims(), inputDataType); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType); } config.outConfs.resize(1); config.outConfs[0].inPlace = 0; config.outConfs[0].constant = false; - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp index 5f6e6083e90c4a..ffa831a670db77 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -85,9 +83,9 @@ void MKLDNNReverseSequenceNode::initSupportedPrimitiveDescriptors() { if (lengthsPrecision != Precision::I32 && lengthsPrecision != Precision::FP32) lengthsPrecision = Precision::I32; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, lengthsPrecision}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, lengthsPrecision}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -96,7 +94,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { const float *src_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision()) { + switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()) { case Precision::FP32: { float *seq_lengths_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemoryPtr()->GetPtr()); for (i = 0; i < src_dims[batch_axis]; i++) { @@ -171,7 +169,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { break; default: IE_THROW() << "ReverseSequence layer does not support " - << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision() << " precision"; + << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision() << " precision"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index a85544e9e96aa7..91201da8592dc6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -260,19 +260,19 @@ void MKLDNNRNN::initCell(const std::shared_ptr& op) { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // Expected shapes - MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; + std::vector D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; - if (in_data_dims != D_shape.ToSizeVector() - || in_h_state_dims != S_shape.ToSizeVector() - || out_h_state_dims != S_shape.ToSizeVector()) + if (in_data_dims != D_shape + || in_h_state_dims != S_shape + || out_h_state_dims != S_shape) IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); if (S == 2) { auto in_c_state_dims = op->get_input_shape(2); auto out_c_state_dims = op->get_output_shape(1); - if (in_c_state_dims != S_shape.ToSizeVector() - || out_c_state_dims != S_shape.ToSizeVector()) + if (in_c_state_dims != S_shape + || out_c_state_dims != S_shape) IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); } } @@ -281,52 +281,57 @@ void MKLDNNRNN::fillCellDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - MKLDNNDims S_4D_shape {L, D, N, SC}; + std::vector S_4D_shape {L, D, N, SC}; // layer input plus states - in_data_d.resize(S + 1); - out_data_d.resize(S + 1); + in_data_d.reserve(S + 1); + out_data_d.reserve(S + 1); // Shapes and Attributes are correct. Can start internal stuff initialization. - in_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, DC}, dataType, memory::format_tag::tnc}; - out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, SC}, dataType, memory::format_tag::tnc}; + in_data_d.emplace_back(std::vector{T, N, DC}, dataType, memory::format_tag::tnc); + out_data_d.emplace_back(std::vector{T, N, SC}, dataType, memory::format_tag::tnc); - in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + in_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); if (haveCellState(cell_type)) { - in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; + in_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); // Add 5th input - w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); // Expected shapes - MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; - std::vector in_candidate, out_candidate; + std::vector D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; + std::vector in_candidate, out_candidate; in_candidate.reserve(6); - in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); - out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); + in_candidate.emplace_back(D_shape, dataType, memory::format_tag::nc); + in_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); + out_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); if (haveCellState(cell_type)) { - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); + in_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); + out_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); } if (one_of(cell_type, mkldnn::algorithm::vanilla_rnn, mkldnn::algorithm::vanilla_gru, mkldnn::algorithm::lbr_gru, mkldnn::algorithm::vanilla_lstm)) { - in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x}); + in_candidate.emplace_back(WShape, memory::data_type::f32, memory::format_tag::nc); + in_candidate.emplace_back(RShape, memory::data_type::f32, memory::format_tag::nc); + in_candidate.emplace_back(BShape, memory::data_type::f32, memory::format_tag::x); } - createDescriptor(in_candidate, out_candidate); + std::vector in_candidate_ptrs(in_candidate.size()); + std::vector out_candidate_ptrs(out_candidate.size()); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + + createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } void MKLDNNRNN::initSeq(const std::shared_ptr& op) { @@ -373,64 +378,71 @@ void MKLDNNRNN::initSeq(const std::shared_ptr& op) { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // layer input plus states - in_data_d.resize(S + 1); - out_data_d.resize(S + 1); + in_data_d.reserve(S + 1); + out_data_d.reserve(S + 1); } void MKLDNNRNN::fillSeqDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - MKLDNNDims S_4D_shape {L, D, N, SC}; + std::vector S_4D_shape {L, D, N, SC}; // Try to create descriptor and corresponding configuration - in_data_d[RNNInOutKind::Layer] = {MKLDNNDims{in_data_dims}, dataType, memory::format_tag::tnc}; - out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{out_data_dims}, dataType, memory::format_tag::tnc}; + in_data_d.emplace_back(std::vector{in_data_dims}, dataType, memory::format_tag::tnc); + out_data_d.emplace_back(std::vector{out_data_dims}, dataType, memory::format_tag::tnc); - in_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc}; + in_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); + out_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); if (haveCellState(cell_type)) { - in_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc}; + in_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); + out_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); - w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); - std::vector in_candidate; + std::vector in_candidate; + in_candidate.reserve(7); if (nativeOrder) - in_candidate.push_back(MKLDNNMemoryDesc{inDims[RNNInOutKind::Layer], dataType, memory::format_tag::tnc}); + in_candidate.emplace_back(inputShapes[RNNInOutKind::Layer].getStaticDims(), dataType, memory::format_tag::tnc); else - in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc}); + in_candidate.emplace_back(std::vector{N, T, DC}, dataType, memory::format_tag::ntc); - in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); // initial hidden state + in_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); // initial hidden state if (haveCellState(cell_type)) - in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); // initial cell state - in_candidate.push_back(MKLDNNMemoryDesc{{N}, memory::data_type::s32, memory::format_tag::x}); // sequence lengths - in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc}); // W - in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc}); // R - in_candidate.push_back(MKLDNNMemoryDesc{{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc}); // B + in_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); // initial cell state + in_candidate.emplace_back(std::vector{N}, memory::data_type::s32, memory::format_tag::x); // sequence lengths + in_candidate.emplace_back(std::vector{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc); // W + in_candidate.emplace_back(std::vector{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc); // R + in_candidate.emplace_back(std::vector{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc); // B - std::vector out_candidate; + std::vector out_candidate; + out_candidate.reserve(3); if (nativeOrder) { - out_candidate.push_back(out_data_d[RNNInOutKind::Layer]); + out_candidate.emplace_back(out_data_d[RNNInOutKind::Layer]); } else { // TODO reorder ntc -> ndtc does not work, thus use tnc(plain) + transformation reshape-transpose-reshape for now. - out_candidate.push_back(MKLDNNMemoryDesc{{T, N, SC}, dataType, memory::format_tag::tnc}); + out_candidate.emplace_back(std::vector{T, N, SC}, dataType, memory::format_tag::tnc); } - out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); + out_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); if (haveCellState(cell_type)) - out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); + out_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); + + std::vector in_candidate_ptrs(in_candidate.size()); + std::vector out_candidate_ptrs(out_candidate.size()); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - createDescriptor(in_candidate, out_candidate); + createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { @@ -447,14 +459,14 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t } // create weight blobs (data and state part) auto w_data_mem = std::make_shared(getEngine()); - w_data_mem->Create(w_data_d); + w_data_mem->Create(*w_data_d); internalBlobMemory.push_back(w_data_mem); auto w_state_mem = std::make_shared(getEngine()); - w_state_mem->Create(w_state_d); + w_state_mem->Create(*w_state_d); internalBlobMemory.push_back(w_state_mem); - const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getDims().size(); - const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getDims().size(); + const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().getElementsCount(); + const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().getElementsCount(); auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); auto wConstBlob = wInputNode->getMemoryPtr(); @@ -504,7 +516,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { } auto w_bias_mem = std::make_shared(getEngine()); - w_bias_mem->Create(w_bias_d); + w_bias_mem->Create(*w_bias_d); internalBlobMemory.push_back(w_bias_mem); auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); @@ -590,18 +602,17 @@ void MKLDNNRNN::copyWeightsData() { if (runtimePrecision == Precision::BF16 || runtimePrecision == Precision::FP32) fillBiases(gate_map); } - -void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { switch (cell_type) { case mkldnn::algorithm::vanilla_rnn: { MKLDNNDescriptor desc(std::shared_ptr( new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -611,9 +622,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, new gru_forward::desc(prop_kind::forward_scoring, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -623,9 +634,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, new lbr_gru_forward::desc(prop_kind::forward_scoring, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -636,9 +647,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], /* In State C */ in_data_d[RNNInOutKind::CellState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState], /* Out State C */ out_data_d[RNNInOutKind::CellState]))); @@ -649,21 +660,21 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, } // Fill supported config - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; for (size_t i = 0; i < inputDesc.size(); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = inputDesc[i]; + dataConfig.desc = inputDesc[i]->clone(); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < outputDesc.size(); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = outputDesc[i]; + dataConfig.desc = outputDesc[i]->clone(); config.outConfs.push_back(dataConfig); } @@ -705,9 +716,9 @@ void MKLDNNRNN::execute(mkldnn::stream strm) { args[state_o_tags[s]] = getChildEdgesAtPort(s)[0]->getMemoryPtr()->GetPrimitive(); } } else { - ptrdiff_t n_ports_with_init_states = outDims.size() - 1; // first is a sequence data + size_t n_ports_with_init_states = outputShapes.size() - 1; // first is a sequence data for (size_t s = 0; s < std::min(S, n_ports_with_init_states); s++) { - if (s < inDims.size()) { + if (s < outputShapes.size()) { args[state_o_tags[s]] = getChildEdgesAtPort(s+1)[0]->getMemoryPtr()->GetPrimitive(); } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index 7b42760a425255..0a2bd93d3d9d3a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -19,8 +19,8 @@ class MKLDNNRNN : public MKLDNNNode { void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void execute(mkldnn::stream strm) override; @@ -40,6 +40,8 @@ class MKLDNNRNN : public MKLDNNNode { void copyWeightsData(); private: + using MKLDNNMemoryDescPtr = std::unique_ptr; + InferenceEngine::Precision runtimePrecision; /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; @@ -57,15 +59,15 @@ class MKLDNNRNN : public MKLDNNNode { mkldnn::algorithm cell_act = mkldnn::algorithm::eltwise_tanh; // Internal attributes - ptrdiff_t N = 0; /**< Batch value */ - ptrdiff_t T = 0; /**< Sequence value */ - ptrdiff_t DC = 0; /**< Input data channel size */ - ptrdiff_t SC = 0; /**< State channel size value */ - ptrdiff_t G = 0; /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */ - ptrdiff_t Gb = 0; /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */ - ptrdiff_t S = 2; /**< Num of state. LSTM - 2, GRU & RNN - 1 */ - const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */ - const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */ + size_t N = 0; /**< Batch value */ + size_t T = 0; /**< Sequence value */ + size_t DC = 0; /**< Input data channel size */ + size_t SC = 0; /**< State channel size value */ + size_t G = 0; /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */ + size_t Gb = 0; /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */ + size_t S = 2; /**< Num of state. LSTM - 2, GRU & RNN - 1 */ + const size_t L = 1; /**< What is it??. Constant for mkldnn impl */ + const size_t D = 1; /**< Num of direction. 1 or 2 */ std::vector in_data_d; std::vector out_data_d; @@ -76,9 +78,9 @@ class MKLDNNRNN : public MKLDNNNode { CellState = 2 }; - MKLDNNMemoryDesc w_data_d; - MKLDNNMemoryDesc w_state_d; - MKLDNNMemoryDesc w_bias_d; + MKLDNNMemoryDescPtr w_data_d; + MKLDNNMemoryDescPtr w_state_d; + MKLDNNMemoryDescPtr w_bias_d; std::vector in_data_dims; std::vector out_data_dims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp index 1aa7752f4560f1..0517350e09c6c1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp @@ -73,31 +73,31 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getParentEdgeAt(2)->getDims().ndims() != 1) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); + if (getParentEdgeAt(2)->getShape().getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims()[1] != 4) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 4) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; } - if (getParentEdgeAt(1)->getDims()[0] != getParentEdgeAt(2)->getDims()[0]) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[0] != getParentEdgeAt(2)->getShape().getStaticDims()[0]) { IE_THROW() << errorPrefix << "has different sizes of inputs for proposals (" - << getParentEdgeAt(1)->getDims()[0] << ") and indexes (" - << getParentEdgeAt(2)->getDims()[0] << ")"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << ") and indexes (" + << getParentEdgeAt(2)->getShape().getStaticDims()[0] << ")"; } } @@ -116,7 +116,7 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrec0); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrec); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(3); config.outConfs.resize(1); @@ -129,11 +129,13 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { }; for (auto fmts : supportedFormats) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmts.first); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nc); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmts.second); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmts.second}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nc); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -178,8 +180,8 @@ void MKLDNNROIAlignNode::executeSpecified() { auto dstBlockDesc = dstMemory.GetDescriptor().data.format_desc.blocking; int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().isPlainFormat(); - auto isNhwcFmt = srcMemory0.GetDesc().isTailCFormat(); + auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); + auto isNhwcFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index a1a7f8329a5c52..23fd252ae2ba38 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -354,21 +354,21 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims()[1] != 5) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 5) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; } } @@ -388,7 +388,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { src_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.inConfs[0].constant = false; @@ -400,7 +400,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].constant = false; config.outConfs[0].inPlace = -1; - auto parentDims = getParentEdgeAt(0)->getDims(); + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); auto format = mayiuse(avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c; impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_common)) { @@ -413,10 +413,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, format); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), dataType, memory::format_tag::nc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_type, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } void MKLDNNROIPoolingNode::createPrimitive() { @@ -428,8 +428,8 @@ void MKLDNNROIPoolingNode::createPrimitive() { const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; jpp.c_block = simd_w; - auto inDims = config.inConfs[0].desc.getDims(); - auto outDims = config.outConfs[0].desc.getDims(); + auto inDims = config.inConfs[0].desc->getShape().getStaticDims(); + auto outDims = config.outConfs[0].desc->getShape().getStaticDims(); jpp.mb = outDims[0]; jpp.c = rnd_up(inDims[1], simd_w); @@ -447,8 +447,8 @@ void MKLDNNROIPoolingNode::createPrimitive() { jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7; auto selectedPD = getSelectedPrimitiveDescriptor(); - jpp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); - jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jpp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision(); + jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision(); jpp.src_data_size = jpp.src_prc.size(); jpp.dst_data_size = jpp.dst_prc.size(); @@ -481,9 +481,9 @@ void MKLDNNROIPoolingNode::execute() { IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides(); - auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides(); - size_t src_roi_step = config.inConfs[1].desc.getBlockingDesc().getStrides()[0]; + auto src_strides = srcMemory0.GetDescWithType().getStrides(); + auto dst_strides = dstMemory.GetDescWithType().getStrides(); + size_t src_roi_step = srcMemory1.GetDescWithType().getStrides()[0]; int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking); int MB = jpp.mb; @@ -512,13 +512,18 @@ void MKLDNNROIPoolingNode::execute() { if (roi_pooling_kernel) { arg.bin_area = 0; arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]]; + (*roi_pooling_kernel)(&arg); } else { - for (int c = 0; c < c_block; c++) { - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + } } } - - (*roi_pooling_kernel)(&arg); } else { size_t roi_off = n * src_roi_step; const auto *src_roi_ptr = &src_roi[roi_off]; @@ -568,18 +573,23 @@ void MKLDNNROIPoolingNode::execute() { arg.kh = hend - hstart; arg.kw = wend - wstart; } else { - for (int c = 0; c < c_block; c++) { - const size_t pool_index = n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c; - if ((hend <= hstart) || (wend <= wstart)) { - dst[pool_index] = 0; - } else { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - float batch_data = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - h * src_strides[2] + w * src_strides[3] + c]; - - if (batch_data > dst[pool_index]) { - dst[pool_index] = batch_data; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + const size_t pool_index = n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c; + if ((hend <= hstart) || (wend <= wstart)) { + dst[pool_index] = 0; + } else { + dst[pool_index] = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + hstart * src_strides[2] + wstart * src_strides[3] + c]; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + float batch_data = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + h * src_strides[2] + w * src_strides[3] + c]; + dst[pool_index] = std::fmax(batch_data, dst[pool_index]); } } } @@ -595,18 +605,35 @@ void MKLDNNROIPoolingNode::execute() { float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0); float width_scale = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0); - float in_y = (jpp.pooled_h > 1 ? (oh * height_scale + roi_start_h_ * (jpp.ih - 1)) : - 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1)); - float in_x = (jpp.pooled_w > 1 ? (ow * width_scale + roi_start_w_ * (jpp.iw - 1)) : - 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1)); + float in_y, in_x; + // because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality: + // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1), + // and as result excess of right limit for proposal value, + // if the border case (current_h == pooled_h - 1) will not be handled explicitly + if (jpp.pooled_h > 1) { + in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1))); + } else { + in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1); + } + if (jpp.pooled_w > 1) { + in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale + roi_start_w_ * (jpp.iw - 1))); + } else { + in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1); + } if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) { if (roi_pooling_kernel) { arg.bin_area = 0; arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]]; } else { - for (int c = 0; c < c_block; c++) { - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + } } } } else { @@ -635,21 +662,27 @@ void MKLDNNROIPoolingNode::execute() { arg.bin_area = 1; } else { - for (int c = 0; c < 1; c++) { - const float top_left = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - top_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; - const float top_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - top_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; - const float bottom_left = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; - const float bottom_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; - - const float top = top_left + (top_right - top_left) * (in_x - left_x_index); - const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index); - - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = - top + (bottom - top) * (in_y - top_y_index); + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + const float top_left = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + top_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; + const float top_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + top_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; + const float bottom_left = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; + const float bottom_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; + + const float top = top_left + (top_right - top_left) * (in_x - left_x_index); + const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index); + + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = + top + (bottom - top) * (in_y - top_y_index); + } } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp index 136ccba9c647d8..410051c7be4b78 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp @@ -41,7 +41,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!"; } - shape = inDims[DATA_INDEX].ToSizeVector(); + shape = inputShapes[DATA_INDEX].getStaticDims(); const auto &dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end()) @@ -52,7 +52,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk } numOfDims = shape.size(); - if (shape != outDims[0].ToSizeVector()) { + if (shape != outputShapes[0].getStaticDims()) { IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions"; } @@ -62,7 +62,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name(); } - const auto axesTensorRank = inDims[AXES_INDEX].ndims(); + const auto axesTensorRank = inputShapes[AXES_INDEX].getRank(); if (axesTensorRank > 1) { IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank; } @@ -73,7 +73,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name(); } - const auto shiftTensorRank = inDims[SHIFT_INDEX].ndims(); + const auto shiftTensorRank = inputShapes[SHIFT_INDEX].getRank(); if (shiftTensorRank > 1) { IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank; } @@ -92,32 +92,31 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; - auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig { - InferenceEngine::DataConfig dataConfig; + auto createDataConfig = [](const Shape& dims, memory::data_type dataType) -> PortConfig { + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims)); + dataConfig.desc = MKLDNNPlugin::make_unique(dims.getStaticDims(), dataType, MKLDNNMemory::GetPlainFormatByRank(dims.getRank())); return dataConfig; }; - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape(), dataType)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape(), memory::data_type::s32)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getShape(), memory::data_type::s32)); - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType)); + config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape(), dataType)); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat}); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); } void MKLDNNRollNode::execute(mkldnn::stream strm) { - const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision(); + const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().GetDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); switch (dataTypeSize) { case sizeof(PrecisionTrait::value_type): { @@ -156,7 +155,7 @@ void MKLDNNRollNode::rollImpl() { auto *output = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); std::vector shiftsVector(numOfDims, 0); - const size_t axesLength = axesEdge->getDims()[0]; + const size_t axesLength = axesEdge->getShape().getStaticDims()[0]; for (size_t dim = 0; dim < axesLength ; ++dim) { int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim]; int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim]; @@ -171,7 +170,7 @@ void MKLDNNRollNode::rollImpl() { const size_t elementSize = sizeof(DataType); const size_t nIterations = totalElements / blockSize; - const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides(); + const auto strides = dataEdge->getMemory().GetDescWithType().getStrides(); parallel_for(nIterations, [&](size_t iter) { size_t start = iter * blockSize; size_t leftBlockStartOffset = start; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index 5b9692fc562903..af7b36dd7f361b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -52,9 +52,9 @@ void MKLDNNScatterUpdateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - if (getParentEdgeAt(DATA_ID)->getDims().ndims() < 1 || - getParentEdgeAt(INDICES_ID)->getDims().ndims() < 1 || - getParentEdgeAt(UPDATE_ID)->getDims().ndims() < 1) { + if (getParentEdgeAt(DATA_ID)->getShape().getRank() < 1 || + getParentEdgeAt(INDICES_ID)->getShape().getRank() < 1 || + getParentEdgeAt(UPDATE_ID)->getShape().getRank() < 1) { IE_THROW() << errorPrefix << " do not support scalar input"; } @@ -77,15 +77,15 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcDataDim = getParentEdgeAt(DATA_ID)->getDims(); - auto indicesDim = getParentEdgeAt(INDICES_ID)->getDims(); - auto updateDim = getParentEdgeAt(UPDATE_ID)->getDims(); - auto dstDataDim = getChildEdgeAt(0)->getDims(); + auto srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + auto updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + auto dstDataDim = getChildEdgeAt(0)->getShape().getStaticDims(); - size_t srcRank = srcDataDim.ndims(); - size_t indicesRank = indicesDim.ndims(); - size_t updateRank = updateDim.ndims(); - size_t dstRank = dstDataDim.ndims(); + size_t srcRank = srcDataDim.size(); + size_t indicesRank = indicesDim.size(); + size_t updateRank = updateDim.size(); + size_t dstRank = dstDataDim.size(); // common check if (srcRank != dstRank) { @@ -179,7 +179,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1 && !getParentEdgeAt(DATA_ID)->getParent()->isConstant(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; if (axisRelaxed) { config.inConfs.resize(4); @@ -201,20 +201,22 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag idxFormat, memory::format_tag updateFormat, memory::format_tag outFormat) { - config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, inFormat); - config.inConfs[INDICES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(INDICES_ID)->getDims(), indicesType, idxFormat); - config.inConfs[UPDATE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(UPDATE_ID)->getDims(), dataType, updateFormat); + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat); + config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType, + idxFormat); + config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType, + updateFormat); if (axisRelaxed) - config.inConfs[AXIS_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXIS_ID)->getDims(), + config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec), memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, outFormat); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, outFormat}); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(DATA_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(INDICES_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(UPDATE_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims()))); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank())); } void MKLDNNScatterUpdateNode::createPrimitive() { @@ -272,8 +274,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetPtr()); - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t srcRank = srcDataDim.size(); int axis = 0; if (axisRelaxed) { @@ -309,8 +311,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { }); if (scatterUpdateMode == ScatterUpdateMode::ScatterUpdate) { - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); size_t updateRank = updateDim.size(); SizeVector expectUpdateShape = {}; @@ -370,9 +372,9 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { // and indices tensor of shape [i_0, i_1, ..., i_k]. // Updates tensor shape should be [d_0, d_1, ... d_(axis - 1), i_0, i_1, ..., i_k, d_(axis + 1), ..., d_n]. void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -403,8 +405,8 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i // k is indices.shape[-1] and should not be greater than rank of input, q is rank of indicies. // updates is a (q-1)-dimension tensor of replacement-slice-values void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -433,9 +435,9 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1, // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2. void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t updateRank = updateDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp index c67a4394ed8cc3..093ee7e82557b4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp @@ -7,7 +7,7 @@ #include #include "ie_parallel.hpp" #include "mkldnn_select_node.h" -#include +#include #include #include #include "common/cpu_memcpy.h" @@ -129,10 +129,10 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() { if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8) IE_THROW() << errorPrefix << " has unsupported precision: " << inputPrecision << " on 'Then' and 'Else' inputs"; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, conditionPrecision}, - {TensorDescCreatorTypes::ncsp, inputPrecision}, - {TensorDescCreatorTypes::ncsp, inputPrecision}}, - {{TensorDescCreatorTypes::ncsp, inputPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, conditionPrecision}, + {LayoutType::ncsp, inputPrecision}, + {LayoutType::ncsp, inputPrecision}}, + {{LayoutType::ncsp, inputPrecision}}, impl_desc_type::ref_any); } @@ -180,8 +180,8 @@ void MKLDNNSelectNode::execute_impl() { } void MKLDNNSelectNode::execute(mkldnn::stream strm) { - const size_t condPrecSize = getParentEdgeAt(CONDITION)->getDesc().getPrecision().size(); - const size_t inputsPrecSize = getParentEdgeAt(THEN)->getDesc().getPrecision().size(); + const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().size(); + const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().size(); switch (condPrecSize) { case 1: { @@ -192,7 +192,7 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); } break; } @@ -204,13 +204,13 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); } break; } default: { IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: " - + std::string(getParentEdgeAt(CONDITION)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().name()); } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp index 95b00af386be31..f83ddfed0d0a67 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp @@ -7,7 +7,7 @@ #include #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include "common/cpu_memcpy.h" #include "utils/general_utils.h" @@ -95,8 +95,8 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() { } // use ncsp as default for non-quantized networks and nspc for quantized - auto firstCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::nspc : TensorDescCreatorTypes::ncsp; - auto secondCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::ncsp : TensorDescCreatorTypes::nspc; + auto firstCreatorType = isInQuantizedGraph ? LayoutType::nspc : LayoutType::ncsp; + auto secondCreatorType = isInQuantizedGraph ? LayoutType::ncsp : LayoutType::nspc; addSupportedPrimDesc({{firstCreatorType, precision}}, {{firstCreatorType, precision}}, @@ -106,11 +106,11 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() { impl_type, supportDynamicBatch_); // canUseBlocked if (axis_ != 1) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}}, + {{LayoutType::nCsp8c, precision}}, impl_type, supportDynamicBatch_); - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}}, + {{LayoutType::nCsp16c, precision}}, impl_type, supportDynamicBatch_); } } @@ -127,7 +127,8 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR << "has unidentified preferable primitive descriptor"; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); int batchRank = axis_; int spatialRank = dataRank_ - axis_ - 1; @@ -135,7 +136,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { // 2 for decomposed axis dim, 1 for composed spatial dim int reshapedRank = batchRank + 2 + static_cast(spatialRank != 0) + static_cast(isBlocked && (spatialRank == 0)); PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -158,9 +159,10 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { const int channelDim = 1; if (isBlocked) { - size_t blkSize = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back(); + const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + size_t blkSize = blkDesc.getBlockDims().back(); size_t CB = div_up(inShape_[1], blkSize); - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = blkDesc.getBlockDims(); if (axis_ > channelDim) { // axis on spatial for (int i = 0; i < batchRank; i++) { params.order[i] = i; @@ -179,7 +181,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { params.order[2] = 2; params.src_block_dims[2] = spatialShapeSize; } - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { if (axis_ == channelDim) { // axis on channel params.order[0] = 0; params.src_block_dims[0] = inShape_[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index 53dda785e69115..9fe05e475fc1dc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -7,6 +7,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -37,19 +38,20 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { if (!getChildEdges().size()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - if (getParentEdgeAt(0)->getDims().ndims() == 3) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::abc); - createDescriptor({in_candidate}, {}); + if (getParentEdgeAt(0)->getShape().getRank() == 3) { + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::abc); + createDescriptor({in_candidate.get()}, {}); } - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { - MKLDNNDims dims = getParentEdgeAt(0)->getDims(); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + const auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); if (MKLDNNMemoryDesc(dims, inputDataType, format).blocksExtended()) continue; - MKLDNNMemoryDesc in_candidate(dims, inputDataType, format); + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(dims, inputDataType, format); - createDescriptor({in_candidate}, {}); + createDescriptor({in_candidate.get()}, {}); } } @@ -63,7 +65,7 @@ void MKLDNNSoftMaxNode::createPrimitive() { descs[0] = desc; std::shared_ptr selected_desc_ptr = descs[0]; - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; @@ -92,33 +94,34 @@ bool MKLDNNSoftMaxNode::created() const { return getType() == Softmax; } -void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; - - if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || - (!isUninitTensorDesc(config.inConfs[0].desc) && - !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc)) - IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; - - if (!isUninitTensorDesc(config.inConfs[0].desc)) { - config.outConfs[0].desc = config.inConfs[0].desc; - } else if (!isUninitTensorDesc(config.outConfs[0].desc)) { - config.inConfs[0].desc = config.outConfs[0].desc; - } else { - config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0); - } - - initDescriptor(config); -} - -void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); + void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto config = selected_pd->getConfig(); + if (isConfigDefined(config)) + return; + + if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || + (config.inConfs[0].desc->isDefined() && + config.outConfs[0].desc->isDefined() && !config.inConfs[0].desc->isCompatible(*config.outConfs[0].desc))) + IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; + + if (config.inConfs[0].desc->isDefined()) { + config.outConfs[0].desc = config.inConfs[0].desc->clone(); + } else if (config.outConfs[0].desc->isDefined()) { + config.inConfs[0].desc = config.outConfs[0].desc->clone(); + } else { + config.inConfs[0].desc = getDefinedInputDesc(config, 0); + config.outConfs[0].desc = config.inConfs[0].desc->clone(); + } + + initDescriptor(config); + } + +void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); MKLDNNDescriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h index b422eb3f0307cc..fd200cdb1457fa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h @@ -17,8 +17,8 @@ class MKLDNNSoftMaxNode : public MKLDNNNode { MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void initOptimalPrimitiveDescriptor() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp index 4702f97e0fb841..1861799f97c32b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_space_to_batch_node.h" -#include +#include #include using namespace MKLDNNPlugin; @@ -67,32 +67,32 @@ void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() { if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nspc, precision}}, + addSupportedPrimDesc({{LayoutType::nspc, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] % 8 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] % 16 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } } @@ -112,15 +112,15 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto layout = getParentEdgeAt(0)->getDesc().getLayout(); - const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC; + const bool blocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(outDims); auto outShape5D = getShape5D(inDims); auto blockShape = getShape5D(blockShapeIn); - if (layout == NHWC || layout == NDHWC) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,9 +129,10 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu; - const size_t blockCountInput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; - const size_t blockCountOutput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; + const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + const size_t blockSize = blocked ? outBlkDims.back() : 1lu; + const size_t blockCountInput = outBlkDims[1]; + const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -172,7 +173,7 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1]; - if (layout == NHWC || layout == NDHWC) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -226,12 +227,13 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { } void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { case 1: SpaceToBatchKernel::value_type>(); break; case 2: SpaceToBatchKernel::value_type>(); break; case 4: SpaceToBatchKernel::value_type>(); break; default: - IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'"; + IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) + + "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp index 69c3356a2f018e..25003088139af9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp @@ -6,7 +6,7 @@ #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include @@ -64,13 +64,13 @@ MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const std::shared_ptr 5) THROW_ERROR << "doesn't support dimensions with rank greater than 5"; - SizeVector dstDims = outDims[0].ToSizeVector(); + SizeVector dstDims = outputShapes[0].getStaticDims(); if (srcDims.size() != dstDims.size()) THROW_ERROR << "has incorrect number of input/output dimensions"; @@ -98,8 +98,8 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getDims(); - const size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = srcDims.size(); impl_desc_type impl_type; if (mayiuse(impl::cpu::x64::avx512_common)) { @@ -112,7 +112,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -121,26 +121,26 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2) { auto canUseBlocked = [=](const size_t block) { return srcDims[1] % block == 0 && (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector()); - config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); + config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -154,18 +154,19 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims(); - SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims(); + SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -190,8 +191,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -218,7 +219,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForBlocks, orderShiftForDims, firstSpatialOrder, dstBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index 201bebf4e638a6..a95bd0c4f758e3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -4,12 +4,13 @@ #include "mkldnn_split_node.h" #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include #include #include #include "utils/general_utils.h" +#include #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -74,17 +75,17 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcShape = getParentEdgeAt(0)->getShape(); auto axis_size = 0; - auto dstFirstDims = getChildEdgeAt(0)->getDims(); - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; - if (dstFirstDims.ndims() != o_Dims.ndims()) { + auto dstFirstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto o_Dims = outputShapes[i].getStaticDims(); + if (dstFirstDims.size() != o_Dims.size()) { THROW_ERROR << "only supports output blobs with equal number of dimensions"; } axis_size += o_Dims[axis]; - for (size_t j = 0; j < dstFirstDims.ndims(); j++) { + for (size_t j = 0; j < dstFirstDims.size(); j++) { if (j == axis) continue; if (o_Dims[j] != dstFirstDims[j]) @@ -92,7 +93,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } dstFirstDims[axis] = axis_size; - if (dstFirstDims.size() != srcDims.size()) + if (std::accumulate(dstFirstDims.begin(), dstFirstDims.end(), 1, std::multiplies()) != srcShape.getElementsCount()) THROW_ERROR << "sizes of input blob and sum of output blobs are not equal."; InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0); @@ -105,18 +106,18 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } //Set plain and tailC formats - std::vector tdCreatorTypes{ TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc }; + std::vector tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc }; //Support channel blocked format - if (srcDims.ndims() > 2) { - for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c) }) { - SizeVector blkDims = srcDims.ToSizeVector(); + if (srcShape.getRank() > 2) { + for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { + SizeVector blkDims = srcShape.getStaticDims(); if (blkDims[channelsPos] % item.first) continue; bool blocked = true; - for (size_t i = 0; i < outDims.size(); i++) { - if (outDims[i].ToSizeVector()[channelsPos] % item.first) { + for (size_t i = 0; i < outputShapes.size(); i++) { + if (outputShapes[i].getStaticDims()[channelsPos] % item.first) { blocked = false; break; } @@ -129,43 +130,37 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { std::vector pdIndexesToReuse; - auto& creatorsMap = TensorDescCreator::getCommonCreators(); - auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast(srcDims.ndims()), tdCreatorTypes); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(srcShape.getRank()), tdCreatorTypes); for (auto itr = itrRange.first; itr != itrRange.second; ++itr) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = dynBatchSupport; config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = itr->second->createDesc(inpPrecision, srcDims.ToSizeVector()); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, srcShape.getStaticDims())); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc.setDims({1}); - config.inConfs[1].desc.setPrecision(axisPrecision); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); config.inConfs[2].constant = true; } - config.outConfs.resize(outDims.size()); - - std::vector outFormats; - - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; + config.outConfs.resize(outputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = itr->second->createDesc(inpPrecision, o_Dims.ToSizeVector()); - outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims())); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); - if (itr->first == TensorDescCreatorTypes::ncsp) { + if (itr->first == LayoutType::ncsp) { // at least the plain layout can be optimized inplace. pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1); - } else if (itr->first == TensorDescCreatorTypes::nCsp8c || itr->first == TensorDescCreatorTypes::nCsp16c) { + } else if (itr->first == LayoutType::nCsp8c || itr->first == LayoutType::nCsp16c) { if (axis < 2) { pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1); } @@ -176,12 +171,11 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (auto refPdIndex : pdIndexesToReuse) { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - - const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder(); - const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims(); + const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); + const auto& order = inBlockingDesc->getOrder(); + const auto& blkDims = inBlockingDesc->getBlockDims(); auto numOfDim = blkDims.size(); - std::vector outFormats; SizeVector offsets(numOfDim, 0lu); SizeVector strides(numOfDim); strides.back() = 1lu; @@ -195,49 +189,43 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } - config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(inpPrecision, srcShape.getStaticDims(), blkDims, order, offset, offsets, strides); - for (size_t i = 0; i < outDims.size(); i++) { - const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims(); - const auto& dims = refConfig.outConfs[i].desc.getDims(); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto outBlockingDesc = refConfig.outConfs[i].desc->as(); + const auto& outBlkDims = outBlockingDesc->getBlockDims(); + const auto& dims = outBlockingDesc->getShape().getStaticDims(); config.outConfs[i].inPlace = 0; - config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides}); - outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outPrecision, dims, outBlkDims, order, offset, offsets, strides); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } // Special nspc -> ncsp case when splitting channels - if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) { - InferenceEngine::LayerConfig config; + if (axis == 1 && (dstFirstDims.size() == 4 || dstFirstDims.size() == 5)) { + NodeConfig config; config.dynBatchSupport = dynBatchSupport; config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = creatorsMap.at(TensorDescCreatorTypes::nspc)->createDesc(inpPrecision, srcDims.ToSizeVector()); + config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createUniqueDesc(inpPrecision, srcShape.getStaticDims()); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc.setDims({1}); - config.inConfs[1].desc.setPrecision(axisPrecision); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); config.inConfs[2].constant = true; } - config.outConfs.resize(outDims.size()); - - std::vector outFormats; - - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; + config.outConfs.resize(outputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = creatorsMap.at(TensorDescCreatorTypes::ncsp)->createDesc(inpPrecision, o_Dims.ToSizeVector()); - outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createUniqueDesc(inpPrecision, outputShapes[i].getStaticDims()); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } } @@ -252,18 +240,16 @@ void MKLDNNSplitNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - canUseOptimizedNspc2Ncsp = true; - if (axis != 1) - canUseOptimizedNspc2Ncsp = false; - - if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC && - getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC) - canUseOptimizedNspc2Ncsp = false; + auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->GetDesc(); - for (size_t i = 0; i < getChildEdges().size(); i++) { - if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW && - getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW) - canUseOptimizedNspc2Ncsp = false; + canUseOptimizedNspc2Ncsp = false; + if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) { + canUseOptimizedNspc2Ncsp = true; + for (size_t i = 0; i < getChildEdges().size(); i++) { + auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->GetDesc(); + if (!childMemDesc.hasLayoutType(LayoutType::ncsp)) + canUseOptimizedNspc2Ncsp = false; + } } if (!isOptimized()) { @@ -288,7 +274,7 @@ void MKLDNNSplitNode::execute(mkldnn::stream strm) { } uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - size_t batch = this->getParentEdgeAt(0)->getDims()[0]; + size_t batch = this->getParentEdgeAt(0)->getShape().getStaticDims()[0]; if (batch != MB) optimizedParams.countStrides = optimizedParams.countStrides / batch * MB; @@ -320,50 +306,47 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (isInitConfig(config)) + if (isConfigDefined(config)) return; for (size_t i = 0; i < config.inConfs.size(); i++) { - if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY || - !isUninitTensorDesc(config.inConfs[i].desc)) + if (config.inConfs[i].desc->isDefined()) continue; int num = getParentEdgeAt(i)->getOutputNum(); if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { if (num >= 0) { - if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0) + const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; + if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0) getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); - if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - MKLDNNExtensionUtils::initTensorsAreEqual( - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc, - config.inConfs[i].desc)) { - config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc; + if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) { + config.inConfs[i].desc = parentConfig.desc->clone(); continue; } } } - config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), - config.inConfs[i].desc.getDims(), { - config.inConfs[i].desc.getBlockingDesc().getBlockDims(), - config.inConfs[i].desc.getBlockingDesc().getOrder() - }); + + // reset undefined offsets + config.inConfs[i].desc = MemoryDescUtils::resetOffset(config.inConfs[i].desc.get()); } - if (config.outConfs.size() != outDims.size()) + if (config.outConfs.size() != outputShapes.size()) THROW_ERROR << "has invalid config"; + + auto firstInBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); size_t offset = 0; - for (size_t i = 0; i < outDims.size(); i++) { - config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), - config.outConfs[i].desc.getDims(), { - config.outConfs[i].desc.getBlockingDesc().getBlockDims(), - config.outConfs[i].desc.getBlockingDesc().getOrder(), - config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, - config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), - config.inConfs[0].desc.getBlockingDesc().getStrides() - }); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto outBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[i].desc); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outBlockingDesc.getPrecision(), + outBlockingDesc.getShape().getStaticDims(), + outBlockingDesc.getBlockDims(), + outBlockingDesc.getOrder(), + firstInBlockingDesc.getOffsetPadding() + offset, + firstInBlockingDesc.getOffsetPaddingToData(), + firstInBlockingDesc.getStrides()); + size_t axisSize = 1; - for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j]; + for (size_t j = axis; j < outBlockingDesc.getBlockDims().size(); j++) { + axisSize *= outBlockingDesc.getBlockDims()[j]; } offset += axisSize; } @@ -375,10 +358,9 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { // This is needed mostly for the testing purposes, since for the planar layout Split works always in place, we need to enforce // the reference implementation when it is selected in a test to test that piece of code. if (!implPriorities.empty() && implPriorities[0] == impl_desc_type::ref) { - auto plain = PartialBlkDesc::makePlain(getParentEdgeAt(0)->getDims().ToSizeVector()); for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) { auto& pd = supportedPrimitiveDescriptors[i]; - if (PartialBlkDesc::extractFrom(pd.getConfig().inConfs[0].desc) == plain && + if (pd.getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp) && impl_desc_type::ref == pd.getImplementationType()) { selectPrimitiveDescriptorByIndex(static_cast(i)); return; @@ -399,9 +381,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual( - supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc, - parent_spd->getConfig().outConfs[inNum].desc)) { + if (supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc->isCompatible(*parent_spd->getConfig().outConfs[inNum].desc)) { canSelectPrimitive.push_back(i); } } @@ -425,7 +405,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { auto childEdge = getChildEdgeAt(i); auto childPtr = childEdge->getChild(); auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors(); - const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc; + const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[childEdge->getInputNum()].desc; if (!vecChildSpd.empty()) { int inNum = childEdge->getOutputNum(); @@ -437,7 +417,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { if (inNum >= childSpd.getConfig().inConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) { + if (outputDesc->isCompatible(*childSpd.getConfig().inConfs[inNum].desc)) { hasMatchDesc = true; break; } @@ -480,11 +460,11 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors."; - const auto& inpTensorDesc = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc; - const auto outputPortsCount = outDims.size(); + const auto inpTensorDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + const auto outputPortsCount = outputShapes.size(); //find axis order position - const auto& order = inpTensorDesc.getBlockingDesc().getOrder(); + const auto& order = inpTensorDesc.getOrder(); unsigned axisOrderPos = std::numeric_limits::max(); for (size_t i = 0; i < order.size(); ++i) { if (order[i] == axis) { @@ -497,8 +477,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() { } uint8_t srcDataSize = inpTensorDesc.getPrecision().size(); - const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims(); - const auto nDims = srcDims.size(); + const auto& srcDims = inpTensorDesc.getBlockDims(); + const auto getRank = srcDims.size(); optimizedParams.countStrides = 1; for (int i = 0; i < axisOrderPos; i++) @@ -511,8 +491,9 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto outputEdge = this->getChildEdgesAtPort(i).front(); optimizedParams.dataSize[i] = srcDataSize; - for (size_t j = axisOrderPos; j < nDims; j++) - optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j]; + auto desc = outputEdge->getMemory().GetDesc().as(); + for (size_t j = axisOrderPos; j < getRank; j++) + optimizedParams.dataSize[i] *= desc->getBlockDims()[j]; optimizedParams.srcDataStride += optimizedParams.dataSize[i]; } @@ -526,31 +507,32 @@ void MKLDNNSplitNode::prepareOptimizedParams() { void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { auto parentEdge = getParentEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t IC = parentEdge->getDims()[1]; - const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t H = parentEdge->getDims()[ndims - 2]; - const size_t W = parentEdge->getDims()[ndims - 1]; + const int rank = parentEdge->getShape().getRank(); + const auto parentDims = parentEdge->getShape().getStaticDims(); + const size_t IC = parentDims[1]; + const size_t D = rank == 5 ? parentDims[rank - 3] : 1; + const size_t H = parentDims[rank - 2]; + const size_t W = parentDims[rank - 1]; - auto srcBlob = parentEdge->getBlob(); - auto srcData = srcBlob->cbuffer().as(); - const auto dataSize = srcBlob->getTensorDesc().getPrecision().size(); + auto& srcMem = parentEdge->getMemory(); + auto srcData = reinterpret_cast(srcMem.GetData()); + const auto dataSize = srcMem.GetDesc().getPrecision().size(); const size_t DHW = D*H*W; const size_t strideIB = DHW * IC * dataSize; const size_t strideIW = IC*dataSize; const size_t strideOC = DHW * dataSize; - for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) { + for (size_t i = 0, sIdx = 0; i < outputShapes.size(); i++) { auto dstData = dstMemPtrs[i]; size_t innerSize = 1; - auto dims = outDims[i].ToSizeVector(); + auto dims = outputShapes[i].getStaticDims(); for (size_t j = axis; j < dims.size(); j++) { innerSize *= dims[j]; } - auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize; + auto srcPtr = srcData + srcMem.GetDesc().getElementOffset(sIdx) * dataSize; const size_t OC = dims[1]; const size_t strideOB = OC * strideOC; @@ -572,7 +554,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { void MKLDNNSplitNode::initializeDstMemPtrs() { dstMemPtrs.clear(); - for (size_t i = 0; i < outDims.size(); ++i) { + for (size_t i = 0; i < outputShapes.size(); ++i) { auto outputEdges = this->getChildEdgesAtPort(i); if (uint8_t* dstData = reinterpret_cast(outputEdges.front()->getMemoryPtr()->GetPtr())) { dstMemPtrs.push_back(dstData); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp index 1b70de9f0f8341..4f98fc1099f2b2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp @@ -10,7 +10,7 @@ #include "ie_parallel.hpp" #include "caseless.hpp" #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include "utils/general_utils.h" #include "mkldnn_input_node.h" @@ -54,7 +54,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr(op); - const size_t nDims = std::max(inDims[DATA_ID].ndims(), outDims[0].ndims()); + const size_t nDims = std::max(inputShapes[DATA_ID].getRank(), outputShapes[0].getRank()); auto createMask = [&](const std::vector &origMask, const int bit = 0, bool needReverse = false) { std::vector mask(origMask.begin(), origMask.end()); @@ -92,8 +92,8 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) && isConstantNode(getParentEdgesAtPort(END_ID)[0]->getParent()); - const SizeVector srcDims = inDims[DATA_ID].ToSizeVector(); - const SizeVector dstDims = outDims[0].ToSizeVector(); + const SizeVector srcDims = inputShapes[DATA_ID].getStaticDims(); + const SizeVector dstDims = outputShapes[0].getStaticDims(); const size_t nSrcDims = srcDims.size(); const size_t nDims = std::max(nSrcDims, dstDims.size()); @@ -102,21 +102,21 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { if (!getChildEdges().size()) THROW_ERROR << "has incorrect number of output edges"; - beginDims = inDims[BEGIN_ID].ToSizeVector(); + beginDims = inputShapes[BEGIN_ID].getStaticDims(); if (beginDims.size() != 1) THROW_ERROR << " should have begin vector with 1 dimension"; - endDims = inDims[END_ID].ToSizeVector(); + endDims = inputShapes[END_ID].getStaticDims(); if (endDims.size() != 1) THROW_ERROR << "should have end vector with 1 dimension"; if (beginDims[0] != endDims[0]) THROW_ERROR << "should have begin vector with size equal to end vector size"; - if (inDims.size() > STRIDE_ID) { + if (inputShapes.size() > STRIDE_ID) { if (!isConstantNode(getParentEdgesAtPort(STRIDE_ID)[0]->getParent())) params.parametersAreConstant = false; - strideDims = inDims[STRIDE_ID].ToSizeVector(); + strideDims = inputShapes[STRIDE_ID].getStaticDims(); if (strideDims.size() > 1) THROW_ERROR << "should have stride vector with 1 dimension"; if (beginDims[0] != strideDims[0]) @@ -206,11 +206,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { if (hasStrides) stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID); - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - auto dstDims = getChildEdgeAt(0)->getDims(); - size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + size_t nDims = srcDims.size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(getParentEdges().size()); config.inConfs[DATA_ID].inPlace = -1; @@ -225,33 +225,35 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { } config.outConfs.resize(1); - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2 && params.equalDims) { auto canUseBlocked = [=](const size_t blockSize) { return srcDims[1] % blockSize == 0 && abs(stride[1]) == 1 && (begin[1] > srcDims[1] || begin[1] % blockSize == 0); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getDims().ToSizeVector()); - config.inConfs[BEGIN_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(BEGIN_ID)->getDims(), beginDataType, mkldnn::memory::format_tag::x); - config.inConfs[END_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(END_ID)->getDims(), endDataType, mkldnn::memory::format_tag::x); + config.inConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getShape().getStaticDims()); + config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType, + mkldnn::memory::format_tag::x); + config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType, + mkldnn::memory::format_tag::x); if (hasStrides) - config.inConfs[STRIDE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(STRIDE_ID)->getDims(), + config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(stridePrecision), mkldnn::memory::format_tag::x); - config.outConfs[0].desc = itr->second->createDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.outConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } } @@ -265,16 +267,16 @@ void MKLDNNStridedSliceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor."; - auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc(); - auto dstBlockingDesc = getChildEdgeAt(0)->getDesc().getBlockingDesc(); + auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(); + auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); auto srcOrder = srcBlockingDesc.getOrder(); params.srcDims = srcBlockingDesc.getBlockDims(); params.dstDims = dstBlockingDesc.getBlockDims(); - params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc.getPrecision().size(); + params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size(); if (params.parametersAreConstant) { size_t realNDims = params.dstDims.size(); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat()) + if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; @@ -287,9 +289,10 @@ void MKLDNNStridedSliceNode::createPrimitive() { } void MKLDNNStridedSliceNode::orderParametersByLayouts() { - const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isTailCFormat(); - const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isBlockedCFormat(); - auto srcOrder = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc().getOrder(); + const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); + const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType().getOrder(); if (isBlockedLayout) { const size_t blk = params.srcDims.back(); @@ -553,9 +556,9 @@ void MKLDNNStridedSliceNode::indicesCalculation() { void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (!params.parametersAreConstant) { - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - auto dstDims = getChildEdgeAt(0)->getDims(); - const size_t nDims = std::max(srcDims.ndims(), dstDims.ndims()); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = std::max(srcDims.size(), dstDims.size()); const size_t ellipsisMaskCounter = std::accumulate(ellipsisMask.begin(), ellipsisMask.end(), 0); auto fillingInParameters = [&](std::vector ¶meter, const size_t type, const size_t size, const int value) { @@ -574,15 +577,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (strideDims.size()) fillingInParameters(stride, STRIDE_ID, strideDims[0], 1); - if (srcDims.ndims() > 3 && params.equalDims && ellipsisMaskCounter != 0) - addHiddenDims(srcDims.ndims()); + if (srcDims.size() > 3 && params.equalDims && ellipsisMaskCounter != 0) + addHiddenDims(srcDims.size()); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat()) + if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; dimsNormalization(newSrcDims, newDstDims); - dimsGluing(dstDims.ndims(), newSrcDims, newDstDims); + dimsGluing(dstDims.size(), newSrcDims, newDstDims); if (params.dstDims.size() == 1 || params.nDimsForWork != 1) indicesCalculation(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp index d1d80e1b7cba7b..2e1a9f426ef55a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "common/blocked_desc_creator.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -17,15 +18,16 @@ using namespace InferenceEngine::details; namespace MKLDNNPlugin { -static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr& op) { - InferenceEngine::LayerConfig config; +static NodeConfig make_plain_config(const std::shared_ptr& op) { + NodeConfig config; for (size_t i = 0; i < op->get_input_size(); i++) { const auto& dims = op->get_input_shape(i); const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i)); - InferenceEngine::DataConfig data_conf {}; - data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; + PortConfig data_conf {}; + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + data_conf.desc = descCreator->createUniqueDesc(prec, dims); config.inConfs.push_back(data_conf); } @@ -33,8 +35,9 @@ static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptrget_output_shape(i); const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i)); - InferenceEngine::DataConfig data_conf {}; - data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; + PortConfig data_conf {}; + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + data_conf.desc = descCreator->createUniqueDesc(prec, dims); config.outConfs.push_back(data_conf); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h index 32e5eac70b2047..3ba49ae9ad9dbd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h @@ -95,7 +95,7 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode { int loopTripCountIdx = -1; int loopExecutionConditionIdx = -1; - InferenceEngine::LayerConfig config; + NodeConfig config; const std::shared_ptr ngraphOp; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp index 663f3a376f8e91..c92193c6e927c0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp @@ -85,20 +85,18 @@ void MKLDNNTileNode::initSupportedPrimitiveDescriptors() { precision.size() != sizeof(PrecisionTrait::value_type)) { IE_THROW() << errorPrefix << " has unsupported input precision: " << precision; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto& inDims = getParentEdgeAt(0)->getDims(); - memory::format_tag fmt = MKLDNNMemory::GetPlainFormat(inDims); + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(2); config.outConfs.resize(1); - config.inConfs[TILE_INPUT].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_INPUT)->getDims(), inputDataType, fmt); - config.inConfs[TILE_REPEATS].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_REPEATS)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), inputDataType, fmt); + config.inConfs[TILE_INPUT].desc = descCreator->createUniqueDesc(precision, getParentEdgeAt(TILE_INPUT)->getShape().getStaticDims()); + config.inConfs[TILE_REPEATS].desc = descCreator->createUniqueDesc(Precision::I32, getParentEdgeAt(TILE_REPEATS)->getShape().getStaticDims()); + config.outConfs[0].desc = descCreator->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); config.outConfs[0].inPlace = noTiling ? 0 : -1; - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmt}); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } void MKLDNNTileNode::createPrimitive() { @@ -135,13 +133,13 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_inner_dim *= batchToProcess(); } - if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().isBlockedCFormat(8)) { + if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp8c)) { /* * We may enable tile processing directly to appropriate output format (nChw8c) */ m_inner_dim *= 8; m_outer_dim /= 8; - } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().isBlockedCFormat(16)) { + } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp16c)) { /* * We may enable tile processing directly to appropriate output format (nChw16c) */ @@ -149,7 +147,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_outer_dim /= 16; } - m_inner_dim *= srcMemory.GetDesc().GetElementSize(); + m_inner_dim *= srcMemory.GetDesc().getPrecision().size(); for (int i = 0; i < m_outer_dim; ++i) { for (int t = 0; t < tiles; ++t) { cpu_memcpy(dst_ptr, src_ptr, m_inner_dim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp index 1c78c44b48df5a..f3fa2e69b5fa8d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp @@ -84,14 +84,14 @@ void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector outDataConf; + std::vector outDataConf; outDataConf.reserve(getOriginalOutputsNumber()); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalOutputsNumber(); ++i) - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::I32); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}, outDataConf, impl_desc_type::ref_any); } @@ -102,24 +102,24 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { float* dst_data = nullptr; int* dst_idx = nullptr; - if (outDims.size() == 1) { + if (outputShapes.size() == 1) { if (getOriginalOutputPrecisionAtPort(0) == Precision::FP32) { dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); } else { dst_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); } - SizeVector dstDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + SizeVector dstDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); if (dstDims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensor dimension mismatch"; IE_THROW() << errorMsg; } - } else if (outDims.size() == 2) { + } else if (outputShapes.size() == 2) { dst_data = reinterpret_cast(getChildEdgesAtPort(TOPK_VALUE)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getDims().ToSizeVector(); + SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getShape().getStaticDims(); dst_idx = reinterpret_cast(getChildEdgesAtPort(TOPK_INDEX)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getDims().ToSizeVector(); + SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getShape().getStaticDims(); if (dst_idx_dims[axis] != static_cast(src_k) || dst_data_dims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensors dimension mismatch"; @@ -133,7 +133,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { if (src_dims[axis] < static_cast(src_k)) src_k = src_dims[axis]; - SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getDims().ToSizeVector(); + SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getShape().getStaticDims(); if (src_k == 1) { if (is_last_dim) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp index 49bc1bb695dd1b..5ea5b902e3e212 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp @@ -10,7 +10,7 @@ #include #include "ie_parallel.hpp" #include "utils/bfloat16.hpp" - +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -66,7 +66,7 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(2); config.outConfs.resize(1); @@ -74,53 +74,66 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].constant = false; config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), inputOrderDataType, memory::format_tag::x); - if (getParentEdgeAt(0)->getDims().ndims() == 4) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), inputOrderDataType, + memory::format_tag::x); + if (getParentEdgeAt(0)->getShape().getRank() == 4) { + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nchw); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::nchw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nChw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nChw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nhwc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::nhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::ncdhw); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::ncdhw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nCdhw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nCdhw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::ndhwc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::ndhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } else { // general plain case - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -135,23 +148,22 @@ void MKLDNNTransposeNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) { isOptimized = true; return; } PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order = order; + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + params.src_block_dims = srcDesc.getBlockDims(); + params.src_block_order = srcDesc.getOrder(); - auto srcDesc = getParentEdgeAt(0)->getDesc(); - params.src_block_dims = srcDesc.getBlockingDesc().getBlockDims(); - params.src_block_order = srcDesc.getBlockingDesc().getOrder(); - - auto dstDesc = getChildEdgeAt(0)->getDesc(); - params.dst_block_dims = dstDesc.getBlockingDesc().getBlockDims(); - params.dst_block_order = dstDesc.getBlockingDesc().getOrder(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + params.dst_block_dims = dstDesc.getBlockDims(); + params.dst_block_order = dstDesc.getOrder(); permuteKernel = std::unique_ptr(new PermuteKernel(params)); } @@ -263,7 +275,7 @@ void MKLDNNTransposeNode::execute(mkldnn::stream strm) { int MB = batchToProcess(); if (isOptimized) { - const size_t dataSize = getParentEdgeAt(0)->getDesc().getPrecision().size(); + const size_t dataSize = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); TransposeContext ctx = {this, srcMemPtr, dstMemPtr, MB}; OV_SWITCH(MKLDNNPlugin, TransposeOptimizedEmitter, ctx, dataSize, OV_CASE(1, PrecisionTrait::value_type), diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp index 7007c6ad00a13f..3aa58888b58b0e 100644 --- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp +++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp @@ -5,6 +5,7 @@ #include "normalize_preprocess.h" #include "ie_parallel.hpp" #include "nodes/common/cpu_memcpy.h" +#include "utils/general_utils.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -12,7 +13,7 @@ using namespace InferenceEngine; NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) { } -void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) { +void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo) { PreProcessInfo &pp = inputInfo->getPreProcess(); size_t inChannels = pp.getNumberOfChannels(); if (inChannels == 0) { @@ -20,7 +21,7 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input return; } - if (inChannels != inputDims[1]) { + if (!dimsEqualStrong(inChannels, inputShape.getDims()[1])) { IE_THROW() << "channels mismatch between mean and input"; } @@ -76,10 +77,11 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input } } -void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout) { +void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout) { IE_ASSERT(input != nullptr); - if (inputDims.ndims() != 4) { + const auto inputDims = inputShape.getStaticDims(); + if (inputDims.size() != 4) { IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW."; } @@ -88,7 +90,7 @@ void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *inp } int MB = inputDims[0]; - int srcSize = inputDims.size() / MB; + int srcSize = inputShape.getElementsCount() / MB; if (meanBuffer && meanBuffer->size()) { const float * meanBufferValues = meanBuffer->readOnly(); diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h index 1bc6d8431957fd..72ba9fd27a83a3 100644 --- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h +++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h @@ -6,7 +6,7 @@ #include "ie_input_info.hpp" -#include "mkldnn_dims.h" +#include "cpu_shape.h" #include "ie_parallel.hpp" #include #include @@ -18,14 +18,15 @@ class NormalizePreprocess { NormalizePreprocess(); public: - void Load(const MKLDNNDims& inputDims, InferenceEngine::InputInfo::Ptr inputInfo); - void NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout); + void Load(const Shape& inputShape, InferenceEngine::InputInfo::Ptr inputInfo); + void NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout); template::value>::type* = nullptr> - void NormalizeImage(const MKLDNNDims &inputDims, T *input, InferenceEngine::Layout layout) { + void NormalizeImage(const Shape &inputShape, T *input, InferenceEngine::Layout layout) { IE_ASSERT(input != nullptr); - if (inputDims.ndims() != 4) { + const auto inputDims = inputShape.getStaticDims(); + if (inputDims.size() != 4) { IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW."; } @@ -34,7 +35,7 @@ class NormalizePreprocess { } int MB = inputDims[0]; - int srcSize = inputDims.size() / MB; + int srcSize = inputShape.getElementsCount() / MB; if (meanBuffer && meanBuffer->size()) { const float * meanBufferValues = meanBuffer->readOnly(); diff --git a/inference-engine/src/mkldnn_plugin/perf_count.h b/inference-engine/src/mkldnn_plugin/perf_count.h index 3fce79b5e689d0..0f230c4c76f301 100644 --- a/inference-engine/src/mkldnn_plugin/perf_count.h +++ b/inference-engine/src/mkldnn_plugin/perf_count.h @@ -46,4 +46,5 @@ class PerfHelper { } // namespace MKLDNNPlugin -#define PERF(_counter) PerfHelper __helper##__counter (_counter->PerfCounter()); +#define GET_PERF(_counter) std::unique_ptr(new PerfHelper(_counter->PerfCounter())) +#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr; diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp index 17b13034f7f3be..1272183c68beea 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp @@ -5,10 +5,13 @@ #include "blob_dump.h" #include "blob_factory.hpp" #include "mkldnn_memory.h" +#include "mkldnn_extension_utils.h" +#include #include "common/memory_desc_wrapper.hpp" #include +#include using namespace InferenceEngine; @@ -35,7 +38,7 @@ struct IEB_HEADER { unsigned long scaling_data_size; }; -static IEB_HEADER prepare_header(const TensorDesc& desc) { +static IEB_HEADER prepare_header(const MemoryDesc& desc) { IEB_HEADER header = {}; header.magic[0] = IEB_MAGIC[0]; @@ -49,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) { header.precision = desc.getPrecision(); - if (desc.getDims().size() > 7) + if (desc.getShape().getRank() > 7) IE_THROW() << "Dumper support max 7D blobs"; - header.ndims = desc.getDims().size(); + header.ndims = desc.getShape().getRank(); + const auto &dims = desc.getShape().getStaticDims(); for (int i = 0; i < header.ndims; i++) - header.dims[i] = desc.getDims()[i]; + header.dims[i] = dims[i]; header.scaling_axis = NO_SCALES; return header; } -static TensorDesc parse_header(IEB_HEADER &header) { +static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) { if (header.magic[0] != IEB_MAGIC[0] || header.magic[1] != IEB_MAGIC[1] || header.magic[2] != IEB_MAGIC[2] || @@ -72,175 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) { header.ver[1] != 1) IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version."; - Precision prc = Precision(static_cast(header.precision)); + const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast(header.precision))); SizeVector dims(header.ndims); for (int i = 0; i < header.ndims; i++) dims[i] = header.dims[i]; - return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) }; + return MKLDNNMemoryDesc{dims, prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) }; } +void BlobDumper::prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const { + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); + const auto size = data_size * desc.getPrecision().size(); + data.resize(size); -bool is_plain(const Blob::Ptr &blob) { - bool res = true; - - auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides(); - auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder(); - auto dims = blob->getTensorDesc().getDims(); - - for (int stride = 1, i = dims.size() - 1; i >= 0; --i) { - if (stride != orig_strides[i] || i != orig_order[i]) res = false; - stride *= dims[i]; - } - - return res; -} - -static Blob::Ptr prepare_plain_data(Blob::Ptr blob) { // check if it already plain - if (is_plain(blob)) return blob; - - Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims()); - pln_blob->allocate(); + if (desc.hasLayoutType(LayoutType::ncsp)) { + cpu_memcpy(data.data(), reinterpret_cast(memory->GetPtr()), size); + return; + } // Copy to plain - MKLDNNMemoryDesc mdesc(blob->getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); + const void *ptr = memory->GetData(); - size_t data_size = blob->size(); - - // TODO: make it with blob_copy utility - switch (blob->getTensorDesc().getPrecision()) { + switch (desc.getPrecision()) { case Precision::FP32: case Precision::I32: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } - case Precision::I16: - case Precision::U16: case Precision::BF16: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } case Precision::I8: case Precision::U8: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } default: IE_THROW() << "Dumper. Unsupported precision"; } - - return pln_blob; } void BlobDumper::dump(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; - - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - IEB_HEADER header = prepare_header(_blob->getTensorDesc()); - Blob::Ptr pln_blob = prepare_plain_data(_blob); + IEB_HEADER header = prepare_header(memory->GetDesc()); + std::vector data; + prepare_plain_data(this->memory, data); header.data_offset = sizeof(header); - header.data_size = pln_blob->byteSize(); + header.data_size = data.size(); header.scaling_data_offset = 0; header.scaling_data_size = 0; - if (_scales) { - header.scaling_axis = 1; - header.scaling_data_offset = header.data_offset + header.data_size; - header.scaling_data_size = _scales->byteSize(); - } - - stream.write(reinterpret_cast(&header), sizeof(header)); - stream.write(pln_blob->buffer().as(), pln_blob->byteSize()); - - if (_scales) { - stream.write(_scales->buffer().as(), _scales->byteSize()); - } + stream.write(reinterpret_cast(&header), sizeof(header)); + stream.write(reinterpret_cast(data.data()), data.size()); } void BlobDumper::dumpAsTxt(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; - - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - SizeVector dims = _blob->getTensorDesc().getDims(); + const auto dims = memory->GetDims(); + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); // Header like "U8 4D shape: 2 3 224 224 () - stream << _blob->getTensorDesc().getPrecision().name() << " " + stream << memory->GetDesc().getPrecision().name() << " " << dims.size() << "D " << "shape: "; for (size_t d : dims) stream << d << " "; - stream << "(" << _blob->size() << ")" << - " by address 0x" << std::hex << _blob->buffer().as() << std::dec <getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); - - size_t data_size = _blob->size(); - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::FP32: { - auto *blob_ptr = _blob->buffer().as(); + stream << "(" << data_size << ")" << + " by address 0x" << std::hex << reinterpret_cast(memory->GetData()) << std::dec <GetData(); + + switch (desc.getPrecision()) { + case Precision::FP32 : { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; break; } - case Precision::BF16: - { - auto *blob_ptr = _blob->buffer().as(); + case Precision::BF16: { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) { - int i16n = blob_ptr[blob_wrp.off_l(i)]; + int i16n = blob_ptr[desc.getElementOffset(i)]; i16n = i16n << 16; - float fn = *(reinterpret_cast(&i16n)); + float fn = *(reinterpret_cast(&i16n)); stream << fn << std::endl; } break; } case Precision::I32: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; - break; - } - case Precision::I16: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; - break; - } - case Precision::U16: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; break; } case Precision::I8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; break; } case Precision::U8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; break; } default: @@ -252,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) { IEB_HEADER header; stream.read(reinterpret_cast(&header), sizeof(header)); - TensorDesc desc = parse_header(header); - Blob::Ptr blob = make_blob_with_precision(desc); - blob->allocate(); + const auto desc = parse_header(header); + BlobDumper res(desc); stream.seekg(header.data_offset, stream.beg); - stream.read(blob->buffer().as(), header.data_size); + stream.read(reinterpret_cast(res.getDataPtr()), header.data_size); - BlobDumper res(blob); - - // Parse scales fields. - if (header.scaling_axis != NO_SCALES) { - if (header.scaling_axis != 1) - IE_THROW() << "Dumper support scaling only for channel dims."; - - size_t scl_size = header.scaling_data_size / sizeof(float); - auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C}); - scl->allocate(); - - stream.seekg(header.scaling_data_offset, stream.beg); - stream.read(scl->buffer().as(), header.scaling_data_size); - - res._scales = scl; - } return res; } @@ -309,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const { dump_file.close(); } -Blob::Ptr BlobDumper::get() { - return _blob; -} - -template -static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) { - auto dims = from->getTensorDesc().getDims(); - - size_t data_size = from->size(); - size_t outer_size = dims[0]; - size_t c_size = dims.size() > 1 ? dims[1] : 1; - size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] : - dims.size() == 3 ? dims[2] : 1; - - auto to_data = to->buffer().as(); - auto from_data = from->buffer().as(); - - if (scls) { - auto scls_data = scls->buffer().as(); - - for (size_t o=0; o < outer_size; o++) - for (size_t c=0; c < c_size; c++) - for (size_t i=0; i < inner_size; i++) - *to_data++ = static_cast(*from_data++) * scls_data[c]; - } else { - for (size_t i=0; i < data_size; i++) - *to_data++ = static_cast(*from_data++); - } -} - -Blob::Ptr BlobDumper::getRealValue() { - if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales) - return _blob; - - auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims()); - res->allocate(); - - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::U8: plain_copy(_blob, _scales, res); break; - case Precision::FP32: plain_copy(_blob, _scales, res); break; - case Precision::I8: plain_copy(_blob, _scales, res); break; - default: IE_THROW() << "Unsupported precesion for getRealValue method."; - } - - return res; -} - - -BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) { - if ( _blob->getTensorDesc().getDims().size() < 2 || - scales->getTensorDesc().getDims().size() != 1 || - scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] || - scales->getTensorDesc().getPrecision() != Precision::FP32) - IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape."; - - _scales = scales; - return *this; -} - -BlobDumper& BlobDumper::withoutScales() { - _scales.reset(); - return *this; -} - - -const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const { - return _scales; -} - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h index c2cc793e42107b..5271f351d6b492 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h @@ -4,7 +4,7 @@ #pragma once -#include "ie_blob.h" +#include "mkldnn_memory.h" #include @@ -19,15 +19,21 @@ namespace MKLDNNPlugin { * NB! Channel is a second dimension for all blob types. */ class BlobDumper { - InferenceEngine::Blob::Ptr _blob; - InferenceEngine::Blob::Ptr _scales; + MKLDNNMemoryPtr memory; + + void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const; public: BlobDumper() = default; + BlobDumper(const MKLDNNMemoryDesc &desc) { + mkldnn::engine eng(mkldnn::engine::kind::cpu, 0); + memory = std::make_shared(eng); + memory->Create(desc); + } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; - explicit BlobDumper(const InferenceEngine::Blob::Ptr blob):_blob(blob) {} + explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {} static BlobDumper read(const std::string &file_path); static BlobDumper read(std::istream &stream); @@ -38,13 +44,9 @@ class BlobDumper { void dumpAsTxt(const std::string &file_path) const; void dumpAsTxt(std::ostream &stream) const; - BlobDumper& withScales(InferenceEngine::Blob::Ptr scales); - BlobDumper& withoutScales(); - - const InferenceEngine::Blob::Ptr& getScales() const; - - InferenceEngine::Blob::Ptr get(); - InferenceEngine::Blob::Ptr getRealValue(); + void *getDataPtr() const { + return memory->GetPtr(); + } }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp index 0754e346a6e2d2..0cd3975c39a88c 100644 --- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp @@ -90,5 +90,4 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine: } return precision; } - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h index 952bf43dbf579e..35640212a5555b 100644 --- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h +++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h @@ -6,6 +6,7 @@ #include #include +#include "cpu_shape.h" namespace MKLDNNPlugin { @@ -40,6 +41,11 @@ constexpr inline bool implication(bool cause, bool cond) { return !cause || !!cond; } +template +std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + inline std::string getExceptionDescWithoutStatus(const InferenceEngine::Exception& ex) { std::string desc = ex.what(); IE_SUPPRESS_DEPRECATED_START @@ -70,4 +76,62 @@ std::string vec2str(const std::vector &vec) { return std::string("()"); } +/** + * @brief Compares that two dims are equal and defined + * @param lhs + * first dim + * @param rhs + * second dim + * @return result of comparison + */ +inline bool dimsEqualStrong(size_t lhs, size_t rhs) { + return (lhs == rhs && lhs != Shape::UNDEFINED_DIM && rhs != Shape::UNDEFINED_DIM); +} + +/** + * @brief Compares that two dims are equal or undefined + * @param lhs + * first dim + * @param rhs + * second dim + * @return result of comparison + */ +inline bool dimsEqualWeak(size_t lhs, size_t rhs) { + return (lhs == Shape::UNDEFINED_DIM || rhs == Shape::UNDEFINED_DIM || lhs == rhs); +} + +/** + * @brief Compares that two shapes are equal or undefined + * @param lhs + * first shape + * @param rhs + * second shape + * @param skipAxis + * marks shape axis which shouldn't be validated + * @return order + */ +inline bool dimsEqualWeak(const std::vector& lhs, const std::vector& rhs, size_t skipAxis = Shape::UNDEFINED_DIM) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); i++) { + if (i != skipAxis && !dimsEqualWeak(lhs[i], rhs[i])) + return false; + } + + return true; +} + +inline InferenceEngine::Precision getMaxPrecision(std::vector precisions) { + if (!precisions.empty()) { + std::sort(precisions.begin(), precisions.end(), + [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { + return lhs.size() > rhs.size(); + }); + return precisions[0]; + } + + return InferenceEngine::Precision::UNSPECIFIED; +} + } // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp index 1cfbae1ab5ff09..2e0b06c0e4d362 100644 --- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp @@ -9,6 +9,7 @@ #include "ie_common.h" #include "utils/blob_dump.h" #include "utils/debug_capabilities.h" +#include "cpu_memory_desc_utils.h" #include #include @@ -65,14 +66,11 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump inputs: " << dump_file << std::endl; - TensorDesc desc = prEdge->getDesc(); + auto& desc = prEdge->getMemory().GetDesc(); if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(prEdge->getBlob()); - if (pr->ext_scales) - dumper.withScales(pr->ext_scales); - + BlobDumper dumper(prEdge->getMemoryPtr()); dump(dumper, dump_file); } @@ -101,14 +99,11 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump outputs: " << dump_file << std::endl; - TensorDesc desc = childEdge->getDesc(); + auto& desc = childEdge->getMemory().GetDesc(); if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(childEdge->getBlob()); - if (node->ext_scales) - dumper.withScales(node->ext_scales); - + BlobDumper dumper(childEdge->getMemoryPtr()); dump(dumper, dump_file); } } @@ -126,7 +121,9 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const { if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(blb); + MKLDNNMemoryPtr memory = std::make_shared(node->getEngine()); + memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer()); + BlobDumper dumper(memory); dump(dumper, dump_file); } } diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp index 0b7d66f3743080..a8768172c59b8f 100644 --- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp +++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp @@ -18,6 +18,19 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); @@ -38,16 +51,47 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr(); manager.register_pass(); + auto transpose_sinking = manager.register_pass(); + transpose_sinking->add_matcher(); + // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking, + // because it replaces pattern that may contain Transposes which must be optimized before + // the transformation and it also inserts Transpose that can be optimized by TransposeSinking + transpose_sinking->add_matcher(); + + auto eliminations = manager.register_pass(); + eliminations->add_matcher(); + eliminations->set_name("ngraph::pass::CommonEliminations"); + auto common_fusions = manager.register_pass(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); + manager.register_pass(); + manager.register_pass(); + + auto decomp = manager.register_pass(); + decomp->add_matcher(); + + manager.register_pass(); + + auto conv_fusions = manager.register_pass(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->set_name("ngraph::pass::ConvFusions"); + manager.run_passes(f); // Restore original shapes to the nGraph Function diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp index 271b200f31b5bc..e944ffff57bd09 100644 --- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp +++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp @@ -19,12 +19,12 @@ namespace mask_propagation { class Convolution; class GroupConvolution; +class GroupConvolutionReshape; class Elementwise; class PassThrough; class StopPropagation; class FakeQuantize; class Concat; -class Reshape; } // namespace mask_propagation } // namespace pass @@ -192,9 +192,9 @@ class ngraph::pass::mask_propagation::GroupConvolution : public MatcherPass { } }; -class ngraph::pass::mask_propagation::Reshape : public MatcherPass { +class ngraph::pass::mask_propagation::GroupConvolutionReshape : public MatcherPass { public: - Reshape() { + GroupConvolutionReshape() { auto input = pattern::any_input(pattern::has_static_shape()); auto shape = pattern::any_input(); // Working only for Reshapes on Group Convolution weights @@ -258,10 +258,12 @@ class ngraph::pass::mask_propagation::Reshape : public MatcherPass { ngraph::replace_node(old_shape_const, new_const); setMask(m_output, output_mask); - return true; + // This transformation propagates only Reshape mask and doesn't do anything with GroupConvolution. + // So, not to disable GroupConvolution mask propagation we return false here. + return false; }; - auto m = std::make_shared(reshape, "ReshapeMaskPropagation"); + auto m = std::make_shared(gconv, "ReshapeMaskPropagation"); register_matcher(m, callback); } }; @@ -419,13 +421,12 @@ class ngraph::pass::mask_propagation::FakeQuantize : public MatcherPass{ auto fq_node = std::dynamic_pointer_cast(m_output.get_node_shared_ptr()); size_t idx = 0; if (fq_node->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NONE) { - for (auto const_node : fq_params_nodes) { + for (auto node : fq_params_nodes) { + auto const_node = std::dynamic_pointer_cast(node); + if (!const_node) throw ngraph_error("Unexpected operation type."); auto new_shape = broadcast_shape_to_rank(const_node->get_shape(), m_input.get_partial_shape().rank().get_length()); - auto const_copy = const_node->clone_with_new_inputs(const_node->input_values()); - auto new_const = std::dynamic_pointer_cast(const_copy); - new_const->set_data_shape(new_shape); - new_const->validate_and_infer_types(); + auto new_const = std::make_shared(*const_node, new_shape); new_const->set_friendly_name(const_node->get_friendly_name()); ngraph::copy_runtime_info(const_node, new_const); ngraph::replace_node(const_node, new_const); @@ -605,11 +606,11 @@ class ngraph::pass::mask_propagation::StopPropagation : public MatcherPass { ngraph::pass::PropagateMasks::PropagateMasks() { add_matcher(); + add_matcher(); add_matcher(); add_matcher(); add_matcher(); add_matcher(); add_matcher(); - add_matcher(); add_matcher(); } diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp index ffefeed06f0c2b..3af55071aa9c89 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp @@ -349,7 +349,7 @@ template - const int operator()(type_to_type) { return cv_type_to_depth::depth; } + int operator()(type_to_type) { return cv_type_to_depth::depth; } }; } // namespace diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp index cc00ec77114e7c..5faf7bc37c46d0 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp @@ -716,9 +716,9 @@ V10Parser::V10Parser::GenericLayerParams XmlDeserializer::parseGenericParams( int64_t dim = 0; const pugi::char_t* dimVal = node.child_value(); std::stringstream ss(dimVal); - if (!(ss >> dim) || dim < 0) { + if (!(ss >> dim) || dim < -1) { IE_THROW() << "dimension (" << dimVal << ") in node " << node.name() - << " must be a non-negative integer: at offset " + << " must be greater or equal to -1: at offset " << node.offset_debug(); } port.dims.push_back(dim); @@ -855,7 +855,7 @@ std::shared_ptr XmlDeserializer::createNode( size_t index{0}; for (const auto & output_params : params.outputPorts) { - ngraphNode->set_output_type(index, output_params.precision, ngraph::Shape(output_params.dims)); + ngraphNode->set_output_type(index, output_params.precision, ngraph::PartialShape(output_params.dims)); ++index; } } diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp index 540f845488730c..15ac63f531e01f 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp @@ -67,7 +67,7 @@ class V10Parser : public IParser { struct GenericLayerParams { struct LayerPortData { size_t portId; - SizeVector dims; + std::vector dims; ngraph::element::Type_t precision; std::unordered_set names; }; diff --git a/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp new file mode 100644 index 00000000000000..3bed4a37e6adb7 --- /dev/null +++ b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp @@ -0,0 +1,114 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template +class NmsStaticShapeIE : public BaseNmsOp { +public: + NGRAPH_RTTI_DECLARATION; + + using Attributes = typename BaseNmsOp::Attributes; + + /// \brief Constructs a NmsStaticShapeIE operation + /// + /// \param boxes Node producing the box coordinates + /// \param scores Node producing the box scores + /// \param attrs Attributes of the operation + NmsStaticShapeIE(const Output& boxes, + const Output& scores, + const Attributes& attrs) : BaseNmsOp(boxes, scores, attrs) { + this->constructor_validate_and_infer_types(); + } + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + return std::make_shared(new_args.at(0), new_args.at(1), this->m_attrs); + } +}; + +template +void NmsStaticShapeIE::validate_and_infer_types() { + const auto boxes_ps = this->get_input_partial_shape(0); + const auto scores_ps = this->get_input_partial_shape(1); + + auto first_dim_shape = Dimension::dynamic(); + + if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) { + const auto num_boxes_boxes = boxes_ps[1]; + if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) { + const auto num_boxes = num_boxes_boxes.get_length(); + auto num_classes = scores_ps[1].get_length(); + if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) { + num_classes = num_classes - 1; + } + int64_t max_output_boxes_per_class = 0; + if (this->m_attrs.nms_top_k >= 0) + max_output_boxes_per_class = std::min(num_boxes, static_cast(this->m_attrs.nms_top_k)); + else + max_output_boxes_per_class = num_boxes; + + auto max_output_boxes_per_batch = max_output_boxes_per_class * num_classes; + if (this->m_keep_top_k >= 0) + max_output_boxes_per_batch = + std::min(max_output_boxes_per_batch, static_cast(this->m_attrs.keep_top_k)); + + first_dim_shape = max_output_boxes_per_batch * scores_ps[0].get_length(); + } + } + + // 'selected_outputs' have the following format: + // [number of selected boxes, [class_id, box_score, xmin, ymin, xmax, ymax]] + this->set_output_type(0, element::f32, {first_dim_shape, 6}); + // 'selected_indices' have the following format: + // [number of selected boxes, 1] + this->set_output_type(1, this->m_attrs.output_type, {first_dim_shape, 1}); + // 'selected_num' have the following format: + // [num_batches, ] + if (boxes_ps.rank().is_static() && boxes_ps.rank().get_length() > 0) { + this->set_output_type(2, this->m_attrs.output_type, {boxes_ps[0]}); + } else { + this->set_output_type(2, this->m_attrs.output_type, {Dimension::dynamic()}); + } +} + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info() const { return get_type_info_static(); } + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_static() { + auto BaseNmsOpTypeInfoPtr = &BaseNmsOp::get_type_info_static(); + + // TODO: it should be static const std::string name = std::string("NmsStaticShapeIE_") + BaseNmsOpTypeInfoPtr->name; + // but currently it will not pass conversion ot Legacy Opset correctly + static const std::string name = BaseNmsOpTypeInfoPtr->name; + + static const ::ngraph::Node::type_info_t type_info_static{ + name.c_str(), BaseNmsOpTypeInfoPtr->version, BaseNmsOpTypeInfoPtr}; + return type_info_static; +} + +template +const ::ngraph::Node::type_info_t NmsStaticShapeIE::type_info = NmsStaticShapeIE::get_type_info_static(); + +#ifdef __clang__ +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +#endif // __clang__ + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp new file mode 100644 index 00000000000000..79e203485fa383 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API LeakyReluFusion; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief LeakyReluFusion transformation replaces following graph: + * Multiply->Maximum to LeakyRelu + */ + +class ngraph::pass::LeakyReluFusion: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + LeakyReluFusion(); +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp new file mode 100644 index 00000000000000..080a08683222d9 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatrixNmsToMatrixNmsIE(); +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp new file mode 100644 index 00000000000000..b639364b24e978 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMulticlassNmsToMulticlassNmsIE(); +}; diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp new file mode 100644 index 00000000000000..8f173eafcae271 --- /dev/null +++ b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "ngraph/ops.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 415ecb11610901..44b2f5d7f40be7 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -21,6 +21,7 @@ #include "transformations/common_optimizations/swish_fusion.hpp" #include "transformations/common_optimizations/normalize_l2_fusion.hpp" #include "transformations/common_optimizations/pull_transpose_through_fq.hpp" +#include "transformations/common_optimizations/leaky_relu_fusion.hpp" #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" #include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" #include "transformations/common_optimizations/hsigmoid_fusion.hpp" @@ -133,6 +134,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptradd_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); manager.register_pass(); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp new file mode 100644 index 00000000000000..388d2f171041f3 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/leaky_relu_fusion.hpp" +#include "transformations/utils/utils.hpp" + +#include +#include + +#include +#include +#include +#include "itt.hpp" + + +NGRAPH_RTTI_DEFINITION(ngraph::pass::LeakyReluFusion, "LeakyReluFusion", 0); + +ngraph::pass::LeakyReluFusion::LeakyReluFusion() { + MATCHER_SCOPE(LeakyReluFusion); + auto data_pattern = ngraph::pattern::any_input(); + auto alpha_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); + auto multiply_pattern = ngraph::pattern::wrap_type({data_pattern, alpha_pattern}, pattern::consumers_count(1)); + auto max_pattern = ngraph::pattern::wrap_type({data_pattern, multiply_pattern}); + + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto pattern_map = m.get_pattern_value_map(); + auto data = pattern_map.at(data_pattern); + const auto & original_alpha_pattern = pattern_map.at(alpha_pattern); + + if (shape_size(original_alpha_pattern.get_shape()) != 1) + return false; + + auto leaky_relu = register_new_node(data, original_alpha_pattern); + auto maximum = pattern_map.at(max_pattern); + leaky_relu->set_friendly_name(maximum.get_node()->get_friendly_name()); + + copy_runtime_info({ + pattern_map.at(multiply_pattern).get_node_shared_ptr(), + maximum.get_node_shared_ptr() + }, + leaky_relu); + replace_node(maximum.get_node_shared_ptr(), leaky_relu); + + return true; + }; + + auto m = std::make_shared(max_pattern, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp index 905356b4d5fd7a..22aac2e1c71d33 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp @@ -25,10 +25,10 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() { auto pow = std::make_shared(input, exp); auto axes = ngraph::pattern::wrap_type(); auto reduce_sum = std::make_shared(pow, axes); - auto sqrt = std::make_shared(reduce_sum); auto eps_const = ngraph::pattern::wrap_type(); - auto sqrt_max_eps = std::make_shared(sqrt, eps_const); - auto divide = std::make_shared(input, sqrt_max_eps); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -52,12 +52,14 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() { const auto eps_attr_value = eps_attr->cast_vector()[0]; auto normalize_l2 = std::make_shared(data_input, axes_input, eps_attr_value, op::EpsMode::MAX); + if (transformation_callback(normalize_l2)) + return false; normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(), pattern_to_output.at(reduce_sum).get_node_shared_ptr(), pattern_to_output.at(sqrt).get_node_shared_ptr(), - pattern_to_output.at(sqrt_max_eps).get_node_shared_ptr(), + pattern_to_output.at(max).get_node_shared_ptr(), pattern_to_output.at(divide).get_node_shared_ptr() }, normalize_l2); @@ -79,10 +81,10 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() { auto pow = std::make_shared(input, exp); auto axes = ngraph::pattern::wrap_type(); auto reduce_sum = std::make_shared(pow, axes); - auto sqrt = std::make_shared(reduce_sum); auto eps_const = ngraph::pattern::wrap_type(); - auto sqrt_add_eps = std::make_shared(sqrt, eps_const); - auto divide = std::make_shared(input, sqrt_add_eps); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -106,12 +108,14 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() { const auto eps_attr_value = op::util::has_constant_value(exp_input, 2.0f); auto normalize_l2 = std::make_shared(data_input, axes_input, eps_attr_value, op::EpsMode::ADD); + if (transformation_callback(normalize_l2)) + return false; normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(), pattern_to_output.at(reduce_sum).get_node_shared_ptr(), pattern_to_output.at(sqrt).get_node_shared_ptr(), - pattern_to_output.at(sqrt_add_eps).get_node_shared_ptr(), + pattern_to_output.at(add).get_node_shared_ptr(), pattern_to_output.at(divide).get_node_shared_ptr() }, normalize_l2); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp index 9a52445bf76f02..21211a7be462cb 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp @@ -212,8 +212,9 @@ ngraph::pass::TransposeFuse::TransposeFuse() { auto new_order = ngraph::opset7::Constant::create(element::i64, {order2.size()}, order2); auto new_transpose = register_new_node(input, new_order); + new_transpose->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ transpose1, transpose2 }, new_transpose); - ngraph::replace_node(transpose2, new_transpose); + ngraph::replace_node(m.get_match_root(), new_transpose); } return true; diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp new file mode 100644 index 00000000000000..34163fc48601d7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0); + +ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() { + MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp new file mode 100644 index 00000000000000..1f236610e53ed7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0); + +ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() { + MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp index 93f9c24e4b81bb..0ce92c208ea430 100644 --- a/inference-engine/src/transformations/src/transformations/serialize.cpp +++ b/inference-engine/src/transformations/src/transformations/serialize.cpp @@ -642,8 +642,6 @@ bool resolve_dynamic_shapes(const ngraph::Function& f) { [](const Dimension& d) -> Dimension { return d.get_max_length(); }); - NGRAPH_CHECK(PartialShape(out_shape).is_static(), - "Dynamic dimension cannot be resolved in ", op); return out_shape; }; @@ -685,6 +683,7 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, create_layer_ids(f); std::unordered_set unique_names; + // TODO remove resolve_dynamic_shapes function completely when support for -1 will be implemented in the MO bool has_dynamic_shapes = resolve_dynamic_shapes(f); const bool exec_graph = is_exec_graph(f); @@ -711,9 +710,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, if (node->get_input_size() > 0) { pugi::xml_node input = layer.append_child("input"); for (const auto & i : node->inputs()) { - NGRAPH_CHECK(i.get_partial_shape().is_static(), - "Unsupported dynamic input shape in ", node); - // WA for LSTMCellv0, peephole input shall not be serialized if (i.get_index() == 6 && dynamic_cast(node)) { port_id++; @@ -724,10 +720,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, port.append_attribute("id").set_value(port_id++); port.append_attribute("precision") .set_value(get_precision_name(i.get_element_type()).c_str()); - for (auto d : i.get_shape()) { + for (auto d : i.get_partial_shape()) { pugi::xml_node dim = port.append_child("dim"); - dim.append_child(pugi::xml_node_type::node_pcdata) - .set_value(std::to_string(d).c_str()); + if (d.is_dynamic()) { + dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1"); + } else { + dim.append_child(pugi::xml_node_type::node_pcdata) + .set_value(std::to_string(d.get_length()).c_str()); + } } } @@ -739,9 +739,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, if ((node->get_output_size() > 0) && !ngraph::op::is_output(node)) { pugi::xml_node output = layer.append_child("output"); for (const auto & o : node->outputs()) { - NGRAPH_CHECK(o.get_partial_shape().is_static(), - "Unsupported dynamic output shape in ", node); - pugi::xml_node port = output.append_child("port"); port.append_attribute("id").set_value(port_id++); port.append_attribute("precision") @@ -762,10 +759,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, port.append_attribute("names").set_value(names.c_str()); } - for (auto d : o.get_shape()) { + for (auto d : o.get_partial_shape()) { pugi::xml_node dim = port.append_child("dim"); - dim.append_child(pugi::xml_node_type::node_pcdata) - .set_value(std::to_string(d).c_str()); + if (d.is_dynamic()) { + dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1"); + } else { + dim.append_child(pugi::xml_node_type::node_pcdata) + .set_value(std::to_string(d.get_length()).c_str()); + } } } if (node_type_name == "TensorIterator" || node_type_name == "Loop") { @@ -851,7 +852,7 @@ bool pass::Serialize::run_on_function(std::shared_ptr f) { try { serializeFunc(xml_file, bin_file); - } catch (const ngraph::CheckFailure& e) { + } catch (const ngraph::CheckFailure&) { // optimization decission was made to create .bin file upfront and // write to it directly instead of buffering its content in memory, // hence we need to delete it here in case of failure diff --git a/inference-engine/src/vpu/CMakeLists.txt b/inference-engine/src/vpu/CMakeLists.txt index 959ad02186c000..3a11a33509736c 100644 --- a/inference-engine/src/vpu/CMakeLists.txt +++ b/inference-engine/src/vpu/CMakeLists.txt @@ -20,6 +20,9 @@ if(ENABLE_MYRIAD) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/custom_kernels/ DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels COMPONENT myriad) + install(DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vpu_custom_kernels/ + DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels + COMPONENT myriad) install(DIRECTORY ${VPU_CLC_MA2X8X_ROOT}/ DESTINATION deployment_tools/tools/cl_compiler COMPONENT myriad diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt index 71c727b631ab0f..d8b55be48257e8 100644 --- a/inference-engine/src/vpu/common/CMakeLists.txt +++ b/inference-engine/src/vpu/common/CMakeLists.txt @@ -15,7 +15,7 @@ function(add_common_target TARGET_NAME STATIC_IE) UNITY ) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(CMAKE_COMPILER_IS_GNUCXX) # TODO: enable some day and fix all warnings # target_compile_options(${TARGET_NAME} PRIVATE "-Wall") target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function") diff --git a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp index 745613c977ece8..ada40a74d84498 100644 --- a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp +++ b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp @@ -11,24 +11,33 @@ namespace vpu { -template class Map> -inline std::vector getKeys(const Map& map) { +template