diff --git a/external/basis_universal/.github/workflows/static_checks.yml b/external/basis_universal/.github/workflows/static_checks.yml index 1ed054a877..8a09cc2be6 100644 --- a/external/basis_universal/.github/workflows/static_checks.yml +++ b/external/basis_universal/.github/workflows/static_checks.yml @@ -1,5 +1,6 @@ name: 📊 Static Checks -on: [push, pull_request] +on: + workflow_dispatch: concurrency: group: ci-${{ github.actor }}-${{ github.head_ref || github.run_number }}-${{ github.ref }}-static diff --git a/external/basis_universal/.gitrepo b/external/basis_universal/.gitrepo index 60f5917763..36aaefb9db 100644 --- a/external/basis_universal/.gitrepo +++ b/external/basis_universal/.gitrepo @@ -5,8 +5,8 @@ ; [subrepo] remote = https://github.com/KhronosGroup/basis_universal.git - branch = fixes_for_ktx_v5 - commit = e72b15c65b4a60668972c92f57d0ddd965f140da - parent = bc8d0c29096027c5802433aa4081acd11bcc39b0 + branch = fixes_for_ktx + commit = a660e7be16d667a2569890430e630ce66d31ac59 + parent = b0e5077581382bd6e92c191a5082ce7822acb2f9 method = merge cmdver = 0.4.9 diff --git a/external/basis_universal/CMakeLists.txt b/external/basis_universal/CMakeLists.txt index 3b7d698b39..9a82ea9f2d 100644 --- a/external/basis_universal/CMakeLists.txt +++ b/external/basis_universal/CMakeLists.txt @@ -1,24 +1,78 @@ # Important: The Basis Universal encoder and transcoder libraries must be compiled with -fno-strict-aliasing (MSVC's default, and also the Linux kernel). # It should also work without this option, but we do not test with it. + +# Changes from the upstream basis_universal CMakeLists.txt +# +# Enhancements +# 1. Able to configure and build with any multi-config CMake generator, not just Visual Studio. +# 2. Able to build with CMake on and for Windows with compilers other than MSVC, i.e compilers +# for which CMake's MSVC variable is not set: ClangCL, clang, gcc. +# 3. Able to disable building of the `basisu` tool. +# 4. Expose the basisu_encoder target's public dependencies, compile and link options in its +# target interface so they are automatically exported to _any_ application that links with +# the target. No need to understand and manually set them on every application target. +# 5. Able to build on Android NDK and MinGW by handling potential absence of libpthread. +# 6. Use add_compile_{definitions,options} and add_link_options instead of CMAKE_ global +# variables. +# 7. This one is likely of interest only to a minority. Set CMAKE_OSX_DEPLOYMENT_TARGET so +# programs will run on an earlier version of macOS than the target of the Xcode SDK used +# to build it. +# +# Fixes +# - c++ compile options are only set when compiling c++ files. +# - The default value of `BASISU_STATIC`, is set to TRUE because, as with upstream, +# basisu_encoder is always built as a static library. A side effect of this change is to stop +# a link-time warning about setting of an rpath, something done when BASISU_STATIC is FALSE. +# - The libraries added when `BASISU_STATIC` is TRUE are only needed for MinGW and are now added +# only for MinGW. +# +# * Known Issues (also in upstream) +# - BASISU_SSE is set `if (MSVC)` so it is incorrectly set on Windows ARM and not set when +# compilers other than MSVC are being used, including non-Windows platforms. Users must be +# aware and must manually set the correct value for their circumstance. +# - basisu_encoder is always built as a static library so the `BASISU_STATIC` option is +# currently pointless. +# - There is no way to build with OpenCL for Windows arm64. +# +# Due primarily to the changes related to enhancements 1 and 4, this has about 140 fewer lines of +# CMake code, this comment not included. + cmake_minimum_required(VERSION 3.20) if (NOT CMAKE_OSX_DEPLOYMENT_TARGET) # Needed otherwise Xcode builds with the default installed SDK which can often be - # more recent than the macOS version being used. + # more recent than the macOS version being used. Must be before project. set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "macOS Deployment Target") endif() project(basisu C CXX) -option(BASISU_TOOL "Include basisu tool in build" TRUE) -option(BASISU_EXAMPLES "Include examples in build" TRUE) +# pybind11: allow old Python finder modules without complaining +if (POLICY CMP0148) + cmake_policy(SET CMP0148 OLD) +endif() -option(BASISU_STATIC "static linking" TRUE) -option(BASISU_SAN "sanitize" FALSE) +if (CMAKE_SYSTEM_NAME STREQUAL "WASI") + set(BASISU_BUILD_WASM TRUE) +else() + set(BASISU_BUILD_WASM FALSE) +endif() -# Using a generator expression here prevents multi-config generators (VS, Xcode, Ninja Multi-Config) -# from appending a per-configuration subdirectory. NOTE: This means the output could be overwritten -# by a subsequent build for a different configuration. +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +option(BASISU_STATIC "static linking" TRUE) # NO-OP. basisu_encoder always built as static library. +option(BASISU_SAN "sanitize" FALSE) +option(BASISU_EXAMPLES "build examples" TRUE) +option(BASISU_TOOL "build basisu tool" TRUE) +option(BASISU_WASM_THREADING "Enable WASI threading support" OFF) +option(BASISU_BUILD_PYTHON "Build native Python module via pybind11" OFF) + +# Using a generator expression here prevents multi-config generators (VS, Xcode, +# Ninja Multi-Config) from appending a per-configuration subdirectory. This is a much simpler +# alternative to specifying RUNTIME_OUTPUT_DIRECTORY_ for each target and each config. +# NOTE: Setting RUNTIME_OUTPUT_DIRECTORY to an undecorated `bin` directory means the output will +# be overwritten by subsequent builds for different build and cmake configs. set(CMAKE_RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/bin>) # For MSVC builds default to SSE enabled, and determine if it's a 64-bit (-A x64) vs. 32-bit (-A Win32) build. @@ -39,14 +93,58 @@ endif() option(BASISU_ZSTD "ZSTD support for KTX2 transcoding/encoding" TRUE) option(BASISU_OPENCL "OpenCL support in encoder" FALSE) -message("Initial BASISU_BUILD_X64=${BASISU_BUILD_X64}") +# Old option to new (BASISU_ prefixed) automatic remapping +foreach(pair + "STATIC;BASISU_STATIC" + "SAN;BASISU_SAN" + "EXAMPLES;BASISU_EXAMPLES" + "WASM_THREADING;BASISU_WASM_THREADING" + "BUILD_PYTHON;BASISU_BUILD_PYTHON" + "BUILD_X64;BASISU_BUILD_X64" + "SSE;BASISU_SSE" + "ZSTD;BASISU_ZSTD" + "OPENCL;BASISU_OPENCL" +) + list(GET pair 0 OLD) + list(GET pair 1 NEW) + + if(DEFINED ${OLD}) + message(WARNING "[BASISU] Legacy option '${OLD}' is deprecated. Use '${NEW}' instead.") + set(${NEW} "${${OLD}}" CACHE BOOL "" FORCE) + endif() +endforeach() + +if (BASISU_BUILD_WASM) + message(STATUS "Configuring for WASM (WASI-SDK)") + + # WASM is always 32-bit + set(BASISU_BUILD_X64 OFF CACHE BOOL "" FORCE) + + # WASM cannot use SSE + set(BASISU_SSE OFF CACHE BOOL "" FORCE) + + # WASM cannot use OpenCL + set(BASISU_OPENCL OFF CACHE BOOL "" FORCE) + + # WASM cannot use dynamic linking + # TODO: Fix this untrue statement. basisu_encoder always built as a static library. + set(BASISU_STATIC OFF CACHE BOOL "" FORCE) + + # WASM cannot use sanitizers + set(BASISU_SAN OFF CACHE BOOL "" FORCE) +endif() + message("Initial CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") +message("Initial BASISU_BUILD_X64=${BASISU_BUILD_X64}") +message("Initial BASISU_BUILD_WASM=${BASISU_BUILD_WASM}") +message("Initial BASISU_WASM_THREADING=${BASISU_WASM_THREADING}") +message("Initial BASISU_BUILD_PYTHON=${BASISU_BUILD_PYTHON}") message("Initial BASISU_SSE=${BASISU_SSE}") message("Initial BASISU_ZSTD=${BASISU_ZSTD}") message("Initial BASISU_OPENCL=${BASISU_OPENCL}") message("Initial BASISU_SAN=${BASISU_SAN}") -message("initial BASISU_TOOL=${BASISU_TOOL}") -message("initial BASISU_EXAMPLES=${BASISU_EXAMPLES}") +message("Initial BASISU_EXAMPLES=${BASISU_EXAMPLES}") +message("Initial BASISU_TOOL=${BASISU_TOOL}") if(MINGW) # Check if the Threads package is provided; if using Mingw it MIGHT be @@ -88,7 +186,7 @@ else() message("Zstandard disabled") endif() -if (NOT MSVC) +if (NOT MSVC AND NOT BASISU_BUILD_WASM) add_compile_options($<$:-g>) # If you want to set an optimization option for non-debug too, use this instead. #add_compile_options($,-g,-O3>) @@ -105,7 +203,9 @@ if (NOT MSVC) -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-misleading-indentation -Wno-maybe-uninitialized -Wno-unused-function - -Wno-stringop-overflow -Wno-unknown-warning-option) + -Wno-stringop-overflow) + add_compile_options("$<$>:-Wno-unknown-warning-option>") + # Add -fPIC ONLY on non-Windows platforms if (NOT WIN32) add_compile_options(-fPIC) @@ -114,6 +214,8 @@ if (NOT MSVC) # AppleClang 14 raises this warning in zstd.cpp. add_compile_options("$<$,$,17>>:-Wno-bitwise-instead-of-logical>") + # GCC 15 raises this warning in + add_compile_options("$<$:-Wno-reorder;-Wno-class-memaccess;-Wno-deprecated-copy>") add_compile_options($<$>:-m32>) @@ -121,6 +223,17 @@ if (NOT MSVC) if (EMSCRIPTEN) add_link_options("SHELL:-s ALLOW_MEMORY_GROWTH=1") endif() +elseif (BASISU_BUILD_WASM) + # _WASI_EMULATED_PROCESS_CLOCKS/-lwasi-emulated-process-clocks is only for ZStd + add_compile_definitions(_WASI_EMULATED_PROCESS_CLOCKS) + add_link_options(-lwasi-emulated-process-clocks) + + add_compile_options(-fno-strict-aliasing -fvisibility=hidden -Wall -Wextra -Wno-unknown-warning-option) + + add_compile_options($,-g,-O2>) + + # We need a few MB of stack - don't skip this or WASMTime will silently allow the stack to grow into the heap or static memory, causing corruption. + add_link_options(-Wl,--stack-first -Wl,-z,stack-size=8388608) else() add_compile_options("$<$:-Wno-unused-variable;-Wno-unused-function>") endif() @@ -147,12 +260,11 @@ set(ENCODER_LIB_SRC_LIST encoder/basisu_uastc_hdr_4x4_enc.cpp encoder/basisu_astc_hdr_6x6_enc.cpp encoder/basisu_astc_hdr_common.cpp + encoder/basisu_astc_ldr_common.cpp + encoder/basisu_astc_ldr_encode.cpp encoder/3rdparty/android_astc_decomp.cpp encoder/3rdparty/tinyexr.cpp transcoder/basisu_transcoder.cpp -) - -set(ENCODER_LIB_HDR_LIST encoder/basisu_astc_hdr_6x6_enc.h encoder/basisu_astc_hdr_common.h encoder/basisu_backend.h @@ -175,6 +287,8 @@ set(ENCODER_LIB_HDR_LIST encoder/basisu_ssim.h encoder/basisu_uastc_enc.h encoder/basisu_uastc_hdr_4x4_enc.h + encoder/basisu_astc_ldr_common.h + encoder/basisu_astc_ldr_encode.h encoder/cppspmd_flow.h encoder/cppspmd_math_declares.h encoder/cppspmd_math.h @@ -193,24 +307,23 @@ set(ENCODER_LIB_HDR_LIST transcoder/basisu_transcoder_internal.h transcoder/basisu_transcoder_uastc.h transcoder/basisu_transcoder.h + transcoder/basisu_idct.h transcoder/basisu.h + zstd/zstd.h ) if (BASISU_ZSTD) set(ENCODER_LIB_SRC_LIST ${ENCODER_LIB_SRC_LIST} zstd/zstd.c) - set(ENCODER_LIB_HDR_LIST ${ENCODER_LIB_HDR_LIST} zstd/zstd.h) endif() # Create the static library -add_library(basisu_encoder STATIC ${ENCODER_LIB_SRC_LIST} ${ENCODER_LIB_HDR_LIST}) +add_library(basisu_encoder STATIC ${ENCODER_LIB_SRC_LIST}) target_include_directories(basisu_encoder INTERFACE $ $ # So KTX-Software can use it. ) -# PUBLIC so it will be exported to dependent programs. -target_compile_features(basisu_encoder PUBLIC cxx_std_17) if (EMSCRIPTEN) target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_SSE=0) @@ -218,16 +331,22 @@ else() target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_SSE=$,1,0> ) + target_compile_definitions(basisu_encoder PUBLIC + BASISU_WASI_THREADS=$,1,0> + ) target_compile_options(basisu_encoder PRIVATE "$<$,$>:-msse4.1>" ) + target_compile_options(basisu_encoder PUBLIC + "$<$:-fno-exceptions -fno-rtti>" + ) endif() target_compile_definitions(basisu_encoder PRIVATE "BASISD_SUPPORT_KTX2_ZSTD=$,1,0>") if (BASISU_OPENCL) # basisu uses this to confirm the library has been compiled with OpenCL support hence PUBLIC. target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_OPENCL=1) - if (NOT WIN32) # True when the target system is Windows. + if (NOT WIN32) # True when the target system is not Windows. # For Non-Windows builds, use the system OpenCL headers/libs, if cmake found them. target_include_directories(basisu_encoder PRIVATE ${OpenCL_INCLUDE_DIRS}) target_link_libraries(basisu_encoder PRIVATE ${OpenCL_LIBRARIES}) @@ -244,7 +363,7 @@ else() target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_OPENCL=0) endif() -if (NOT MSVC) +if (NOT MSVC AND NOT BASISU_BUILD_WASM) # Only link 'm' on non-Windows platforms (Linux, macOS) if (NOT WIN32) target_link_libraries(basisu_encoder INTERFACE m) @@ -259,30 +378,57 @@ if (NOT MSVC) endif() endif() -macro(set_common_executable_properties target) +macro(set_common_executable_properties target link_encoder) #if (MSVC) target_sources(${target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/basisu.manifest") #endif() - target_link_libraries(${target} PRIVATE basisu_encoder) + if (${link_encoder}) + target_link_libraries(${target} PRIVATE basisu_encoder) + endif() if (NOT BASISU_STATIC AND NOT EMSCRIPTEN AND NOT WIN32) target_link_options(${target} PUBLIC -Wl,-rpath .) endif() + if (BASISU_BUILD_WASM) + # Add proper suffix + set_target_properties(${target} PROPERTIES SUFFIX ".wasm") + if (BASISU_WASM_THREADING) + set_target_properties(${target} PROPERTIES OUTPUT_NAME "$_mt") + else() + set_target_properties(${target} PROPERTIES OUTPUT_NAME "$_st") + endif() + # 256 MB initial, 3.5 GB max safe defaults for BasisU + target_link_options(${target} PRIVATE + -Wl,--initial-memory=268435456 + -Wl,--max-memory=3758096384 + ) + endif() endmacro() if (BASISU_TOOL) - # Create the basisu executable and link against the static library + # Create the basisu executable add_executable(basisu basisu_tool.cpp) - set_common_executable_properties(basisu) + set_common_executable_properties(basisu TRUE) endif() if (BASISU_EXAMPLES) - # Create the new example executable and link against the static library + # Create the example executables add_executable(examples example/example.cpp) - set_common_executable_properties(examples) + set_common_executable_properties(examples TRUE) + + add_executable(example_capi example_capi/example_capi.c encoder/basisu_wasm_api.cpp encoder/basisu_wasm_transcoder_api.cpp) + set_common_executable_properties(example_capi TRUE) + + add_executable(example_transcoding example_transcoding/example_transcoding.cpp example_transcoding/utils.cpp zstd/zstddeclib.c transcoder/basisu_transcoder.cpp) + set_common_executable_properties(example_transcoding FALSE) + # As target is not linked with basisu_encoder, these values won't be imported. + target_compile_definitions(example_transcoding PRIVATE "BASISD_SUPPORT_KTX2_ZSTD=$,1,0>") + target_compile_options(example_transcoding PUBLIC + "$<$:-fno-exceptions -fno-rtti>" + ) endif() if (BASISU_TOOL AND NOT EMSCRIPTEN) - if (UNIX) + if (UNIX AND NOT BASISU_BUILD_WASM) if (CMAKE_BUILD_TYPE STREQUAL "Release") if (APPLE) add_custom_command(TARGET basisu POST_BUILD COMMAND strip -X -x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/basisu) @@ -294,3 +440,135 @@ if (BASISU_TOOL AND NOT EMSCRIPTEN) endif() endif() endif() + +# ------------------------------------------------------------ +# Build WASM WASI API module (single or multi-threaded) +# ------------------------------------------------------------ +if (BASISU_BUILD_WASM) + set(BASISU_WASM_API_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/encoder/basisu_wasm_api.cpp + ) + + # Select output name based on threading flag + if (BASISU_WASM_THREADING) + set(BASISU_WASM_OUTPUT_NAME "basisu_module_mt") + else() + set(BASISU_WASM_OUTPUT_NAME "basisu_module_st") + endif() + + add_executable(${BASISU_WASM_OUTPUT_NAME} ${BASISU_WASM_API_SRC}) + target_link_libraries(${BASISU_WASM_OUTPUT_NAME} PRIVATE basisu_encoder) + + set_target_properties(${BASISU_WASM_OUTPUT_NAME} PROPERTIES SUFFIX ".wasm") + + # Common WASM options + target_link_options(${BASISU_WASM_OUTPUT_NAME} PRIVATE + -Wl,--initial-memory=268435456 + -Wl,--max-memory=3758096384 + -Wl,--stack-first + -Wl,-z,stack-size=8388608 + ) + + set_target_properties(${BASISU_WASM_OUTPUT_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/python/basisu_py/wasm> + ) + + # Threading options + if (BASISU_WASM_THREADING) + target_compile_options(${BASISU_WASM_OUTPUT_NAME} PRIVATE + -pthread + -matomics + ) + target_link_options(${BASISU_WASM_OUTPUT_NAME} PRIVATE + -pthread + -Wl,--shared-memory + -Wl,--export-memory + ) + + target_compile_definitions(${BASISU_WASM_OUTPUT_NAME} PRIVATE BASISU_WASI_THREADS=1) + endif() +endif() + +if (BASISU_BUILD_WASM) + # Select output name based on threading flag + if (BASISU_WASM_THREADING) + set(BASISU_TRANSCODER_WASM_OUTPUT_NAME "basisu_transcoder_module_mt") + else() + set(BASISU_TRANSCODER_WASM_OUTPUT_NAME "basisu_transcoder_module_st") + endif() + + add_executable(${BASISU_TRANSCODER_WASM_OUTPUT_NAME} + ${CMAKE_CURRENT_SOURCE_DIR}/encoder/basisu_wasm_transcoder_api.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/transcoder/basisu_transcoder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/zstd/zstddeclib.c) + + set_target_properties(${BASISU_TRANSCODER_WASM_OUTPUT_NAME} PROPERTIES SUFFIX ".wasm") + + target_link_options(${BASISU_TRANSCODER_WASM_OUTPUT_NAME} PRIVATE + -Wl,--initial-memory=16777216 + -Wl,--stack-first + -Wl,-z,stack-size=4194304 + ) + target_compile_options(${BASISU_TRANSCODER_WASM_OUTPUT_NAME} PRIVATE -fno-exceptions -fno-rtti) + + set_target_properties(${BASISU_TRANSCODER_WASM_OUTPUT_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/python/basisu_py/wasm> + ) +endif() + + +# ------------------------------------------------------------ +# Optional: Build native Python modules (pybind11) +# ------------------------------------------------------------ +if (BASISU_BUILD_PYTHON AND NOT BASISU_BUILD_WASM) + find_package(pybind11 CONFIG REQUIRED) + + message(STATUS "Building pybind11 Python extension: basisu_python") + + pybind11_add_module(basisu_python + python/basisu_encoder_pybind11.cpp + encoder/basisu_wasm_api.cpp + ) + + # Ensure PIC ONLY for this target + set_property(TARGET basisu_python PROPERTY POSITION_INDEPENDENT_CODE ON) + + target_link_libraries(basisu_python PRIVATE basisu_encoder) + + # Put basisu_python so into python/basisu_py + set_target_properties(basisu_python PROPERTIES + LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/python/basisu_py> + PREFIX "" # Required by Python + OUTPUT_NAME "basisu_python" # Just to be explicit + ) + + if (MSVC) + set_target_properties(basisu_python PROPERTIES SUFFIX ".pyd") + endif() +endif() + +if (BASISU_BUILD_PYTHON AND NOT BASISU_BUILD_WASM) + find_package(pybind11 CONFIG REQUIRED) + + message(STATUS "Building pybind11 Python extension: basisu_transcoder_python") + + pybind11_add_module(basisu_transcoder_python + python/basisu_transcoder_pybind11.cpp + encoder/basisu_wasm_transcoder_api.cpp + transcoder/basisu_transcoder.cpp + zstd/zstddeclib.c + ) + + # Ensure PIC ONLY for this target + set_property(TARGET basisu_transcoder_python PROPERTY POSITION_INDEPENDENT_CODE ON) + + set_target_properties(basisu_transcoder_python PROPERTIES + LIBRARY_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/python/basisu_py> + PREFIX "" + OUTPUT_NAME "basisu_transcoder_python" + ) + + if (MSVC) + set_target_properties(basisu_transcoder_python PROPERTIES SUFFIX ".pyd") + endif() +endif() diff --git a/external/basis_universal/LICENSE b/external/basis_universal/LICENSE index 30e4b202d8..94ea880430 100644 --- a/external/basis_universal/LICENSE +++ b/external/basis_universal/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2019-2025 Binomial LLC + Copyright 2019-2026 Binomial LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/external/basis_universal/NOTICE b/external/basis_universal/NOTICE new file mode 100644 index 0000000000..a53c09cdf9 --- /dev/null +++ b/external/basis_universal/NOTICE @@ -0,0 +1,15 @@ +NOTICE + +Basis Universalâ„¢ Supercompressed GPU Texture Compression Library + +Copyright © 2016–2026 Binomial LLC. +All rights reserved except as granted under the [Apache 2.0 license](https://github.com/BinomialLLC/basis_universal/blob/master/LICENSE). +"Basis Universal" is a trademark of Binomial LLC. + +The documents in the Basis Universal wiki, and the Basis Universal library, example, and tool source code, fall under the Apache 2.0 license, unless otherwise explicitly indicated. + +Redistributions or derivative works must include a readable copy of the attribution notices from this NOTICE file (see Apache License 2.0 § 4(d)). + +If you modify the Basis Universal source code, specifications, or wiki documents and redistribute the files, you must cause any modified files to carry prominent notices stating that you changed the files (see Apache 2.0 §4(b)). + +**This software, documentation and specifications are provided "as is", without warranty of any kind (see Apache 2.0 §§7–8).** diff --git a/external/basis_universal/README.md b/external/basis_universal/README.md index 9a85a8b51a..674c2292b6 100644 --- a/external/basis_universal/README.md +++ b/external/basis_universal/README.md @@ -1,46 +1,67 @@ -# basis_universal -An LDR/HDR portable GPU compressed texture transcoding system. + + -[![Build status](https://ci.appveyor.com/api/projects/status/87eb0o96pjho4sh0?svg=true)](https://ci.appveyor.com/project/BinomialLLC/basis-universal) +# basis_universal v2.1 +An LDR/HDR portable GPU supercompressed texture transcoding system. + +[![Build status](https://img.shields.io/appveyor/build/BinomialLLC/basis-universal/master.svg)](https://ci.appveyor.com/project/BinomialLLC/basis-universal) ---- Intro ----- -Basis Universal is an open source [supercompressed](http://gamma.cs.unc.edu/GST/gst.pdf) LDR/HDR GPU compressed texture interchange system from Binomial LLC that supports two intermediate file formats: the [.KTX2 open standard from the Khronos Group](https://registry.khronos.org/KTX/specs/2.0/ktxspec.v2.html), and our own ".basis" file format. These file formats support rapid transcoding to virtually any compressed [GPU texture format](https://en.wikipedia.org/wiki/Texture_compression) released in the past ~25 years. +Basis Universalâ„¢ v2.1 is an open source [supercompressed](http://gamma.cs.unc.edu/GST/gst.pdf) LDR/HDR GPU compressed texture interchange system from Binomial LLC that supports two intermediate file formats: the [.KTX2 open standard from the Khronos Group](https://registry.khronos.org/KTX/specs/2.0/ktxspec.v2.html), and our own ".basis" file format. These file formats support rapid transcoding to virtually any compressed [GPU texture format](https://grokipedia.com/page/texture_compression) released over the past quarter century. -Our overall goal with this project is to simplify the encoding and efficient distribution of *portable* LDR and HDR GPU texture, image, and short texture video content in a way that is compatible with any GPU or rendering/graphics API. +## GPU Textures are Infrastructure -The system supports five modes: ETC1S, UASTC LDR 4x4, UASTC HDR 4x4, UASTC HDR 6x6 (with or without RDO), or UASTC HDR 6x6 Intermediate ("GPU Photo"). The C/C++ encoder and transcoder libaries can be compiled to native code or WebAssembly, and all encoder/transcoder features can be accessed from Javascript via a C++ wrapper library which optionally supports [WASM multithreading](https://web.dev/articles/webassembly-threads) for fast encoding in the browser. ETC1S transcoding (which uses no SIMD code) is slightly faster than libjpeg when compiled to native code. +Our overall goal is to simplify the encoding and efficient distribution of *portable* LDR and HDR GPU texture, image, and short [texture video](https://github.com/BinomialLLC/basis_universal/wiki/Encoding-ETC1S-and-XUASTC-LDR-Texture-Video) content in a way that is compatible with any GPU or rendering/graphics API. -Links ------ +The system supports seven modes (or codecs). In the order they were implemented: +1. **ETC1S**: A supercompressed subset of ETC1 designed for very fast transcoding to other LDR texture formats, low/medium quality but high compression, slightly faster transcoding to other LDR texture formats vs. libjpeg. +2. **UASTC LDR 4x4 (with or without RDO)**: Custom ASTC 4x4-like format designed for very fast transcoding to other LDR texture formats, high quality +3. **UASTC HDR 4x4**: Standard ASTC HDR 4x4 texture data, but constrained for very fast transcoding to BC6H +4. **ASTC HDR 6x6 (with or without RDO)**: Standard ASTC HDR 6x6 +5. **UASTC HDR 6x6 Intermediate ("GPU Photo HDR")**: Supercompressed ASTC HDR 6x6 +6. **ASTC LDR 4x4-12x12 (all 14 standard ASTC block sizes, with or without basic windowed RDO)**: Standard ASTC LDR 4x4-12x12 +7. **XUASTC LDR 4x4-12x12 (all 14 standard ASTC block sizes, "GPU Photo LDR/SDR")**: Latent-space supercompressed ASTC LDR with Weight Grid DCT ([Discrete Cosine Transform](https://grokipedia.com/page/Discrete_cosine_transform)) for very high quality, extreme bitrate scalability, optional adaptive deblocking (CPU or using a [simple GPU pixel shader](https://github.com/BinomialLLC/basis_universal/tree/master/shader_deblocking) compatible with mipmapping and filtering), three entropy coding profiles (Zstd, arithmetic or hybrid). See [JPEG for ASTC](https://github.com/BinomialLLC/basis_universal/wiki/JPEG-for-ASTC), and the [ASTC and XUASTC LDR Usage Guide](https://github.com/BinomialLLC/basis_universal/wiki/ASTC-and-XUASTC-LDR-Usage-Guide). -- [Release Notes](https://github.com/BinomialLLC/basis_universal/wiki/Release-Notes) +The C/C++ encoder and transcoder libraries can be compiled to native code or WebAssembly (web or WASI), and all encoder/transcoder features can be accessed from JavaScript via a C++ wrapper library which optionally supports [WASM multithreading](https://web.dev/articles/webassembly-threads) for fast encoding in the browser. [WASM WASI](https://wasi.dev/) builds, for the command line tool and the encoder/transcoder as a WASI module using a pure C API, are also supported. -- [Live Compression/Transcoding Testbed](https://subquantumtech.com/bu_6x6/ktx2_encode_test/) +Full Python support for encoding/transcoding is now available, supporting native or WASM modules, but is still in the early stages of development. -- [Live WebGL Examples](https://subquantumtech.com/bu_6x6/) +License/Legal +------------- -- [Javascript API/WASM/WebGL info](https://github.com/BinomialLLC/basis_universal/tree/master/webgl) +The reference encoder library, transcoder, and most specification documents in this repo (unless otherwise explictly indicated) are Copyright © 2016–2026 Binomial LLC. All rights reserved except as granted under the [Apache 2.0 LICENSE](https://github.com/BinomialLLC/basis_universal/blob/master/LICENSE). Basis Universalâ„¢ is a trademark of Binomial LLC. KTXâ„¢ is a trademark of [The Khronos Group Inc.](https://www.khronos.org/ktx/) See our Apache 2.0 [NOTICE file](https://github.com/BinomialLLC/basis_universal/wiki/NOTICE). If you modify the Basis Universal reference source code, specifications, or wiki documents and redistribute the files, you must cause any modified files to carry prominent notices stating that you changed the files (see Apache 2.0 §4(b)). -- [UASTC HDR 4x4 Example Images](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-Examples) +See our [DEP5 file](https://github.com/BinomialLLC/basis_universal/blob/master/.reuse/dep5) for the complete list of software and their licenses in this repo. The encoder library is Apache 2.0, but it utilizes some open source 3rd party modules (in 'encoder/3rdparty' and in the 'Zstd' directory) to load [.QOI](https://qoiformat.org/), [.DDS](https://github.com/DeanoC/tiny_dds), [.EXR](https://github.com/syoyo/tinyexr) images, to handle [Zstd](https://github.com/facebook/zstd) compression, and to unpack ASTC texture blocks. See the [LICENSES](https://github.com/BinomialLLC/basis_universal/tree/master/LICENSES) folder. The transcoder utilizes no 3rd party libraries or dependencies, other than Zstd (which is optional but limits the transcoder to non-Zstd utilizing codecs). + +Links +----- -### UASTC 6x6 HDR Specific Links: +- [Wiki/Specifications](https://github.com/BinomialLLC/basis_universal/wiki) +- [Release Notes](https://github.com/BinomialLLC/basis_universal/wiki/Release-Notes) +- [Live Compression/Transcoding Testbed](https://subquantumtech.com/xu/ktx2_encode_test/) - A WASM64 compatible browser is recommended (such as Chrome/Edge/Firefox), especially for XUASTC LDR compression, but it works under plain WASM too (with resolution limits due to less available memory). +- [Live WebGL Examples](https://subquantumtech.com/xu/) +- [JavaScript API/WASM/WebGL info](https://github.com/BinomialLLC/basis_universal/tree/master/webgl) +- [XUASTC LDR Specification](https://github.com/BinomialLLC/basis_universal/wiki/XUASTC-LDR-Specification-v1.0) -- [UASTC HDR 6x6 Example Images](https://github.com/BinomialLLC/basis_universal/wiki/ASTC-HDR-6x6-Example-Images) +### UASTC HDR 4x4/6x6 Specific Links: -- [UASTC HDR 6x6 Support Nodes](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Support-Notes) +- [UASTC HDR 4x4 Example Images](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-Examples) +- [UASTC HDR 6x6 Example Images](https://github.com/BinomialLLC/basis_universal/wiki/ASTC-HDR-6x6-Example-Images) +- [UASTC HDR 6x6 Support Notes](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Support-Notes) +- [Quick comparison of ARM's astcenc HDR 6x6 encoder vs. ours](https://github.com/richgel999/junkdrawer/wiki/ASTC-HDR-6x6-Encoder-Comparisons) -- [Quick comparison of ARM's astcenc HDR 6x6 encoder vs. ours](https://github.com/richgel999/junkdrawer/wiki/ASTC-6x6-HDR:-astcenc-%E2%80%90thorough-%E2%80%90exhausive-vs.-basis-universal-comp_level-3) +---- Supported LDR GPU Texture Formats --------------------------------- -ETC1S and UASTC LDR 4x4 files can be transcoded to: - +ETC1S, UASTC LDR 4x4, XUASTC LDR 4x4-12x12 and ASTC LDR 4x4-12x12 files can be transcoded to: - ASTC LDR 4x4 L/LA/RGB/RGBA 8bpp +- ASTC LDR 4x4-12x12 (XUASTC/ASTC), 0.89-8bpp - BC1-5 RGB/RGBA/X/XY - BC7 RGB/RGBA - ETC1 RGB, ETC2 RGBA, and ETC2 EAC R11/RG11 @@ -51,39 +72,61 @@ ETC1S and UASTC LDR 4x4 files can be transcoded to: Supported HDR GPU Texture Formats --------------------------------- -UASTC HDR 4x4 and UASTC HDR 6x6 files can be transcoded to: +UASTC HDR 4x4, ASTC HDR 6x6, and UASTC HDR 6x6 files can be transcoded to: - ASTC HDR 4x4 (8bpp, UASTC HDR 4x4 only) - ASTC HDR 6x6 RGB (3.56bpp, ASTC HDR 6x6 or UASTC HDR 6x6 intermediate only) - BC6H RGB (8bpp, either UASTC HDR 4x4 or UASTC HDR 6x6) - Uncompressed HDR raster image formats: RGB_16F/RGBA_16F (half float/FP16 RGB, 48 or 64bpp), or 32-bit/pixel shared exponent [RGB_9E5](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt) -Supported Texture Compression Modes ------------------------------------ +---- + +Supported Texture Compression/Supercompression Modes +---------------------------------------------------- -1. [ETC1S](https://github.com/BinomialLLC/basis_universal/wiki/.basis-File-Format-and-ETC1S-Texture-Video-Specification): A roughly .3-3bpp low to medium quality supercompressed mode based off a subset of [ETC1](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression) called "ETC1S". This mode supports variable quality vs. file size levels (like JPEG), alpha channels, built-in compression, and texture arrays optionally compressed as a video sequence using skip blocks ([Conditional Replenishment](https://en.wikipedia.org/wiki/MPEG-1)). This mode can be rapidly transcoded to all of the supported LDR texture formats. +1. **[ETC1S](https://github.com/BinomialLLC/basis_universal/wiki/.basis-File-Format-and-ETC1S-Texture-Video-Specification)**: A roughly .3-3bpp low to medium quality supercompressed mode based on a subset of [ETC1](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression) called "ETC1S". This mode supports variable quality vs. file size levels (like JPEG), alpha channels, built-in compression, and texture arrays optionally compressed as a video sequence using skip blocks ([Conditional Replenishment](https://en.wikipedia.org/wiki/MPEG-1)). This mode can be rapidly transcoded to all of the supported LDR texture formats. -2. [UASTC LDR 4x4](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-LDR-4x4-Texture-Specification): An 8 bits/pixel LDR high quality mode. UASTC LDR is a 19 mode subset of the standard [ASTC LDR](https://en.wikipedia.org/wiki/Adaptive_scalable_texture_compression) 4x4 (8bpp) texture format, but with a custom block format containing transcoding hints. Transcoding UASTC LDR to ASTC LDR and BC7 are particularly fast and simple, because UASTC LDR is a common subset of both BC7 and ASTC. The transcoders for the other texture formats are accelerated by several format-specific hint bits present in each UASTC LDR block. +2. **[UASTC LDR 4x4](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-LDR-4x4-Texture-Specification)**: An 8 bits/pixel LDR high quality mode. UASTC LDR is a 19 mode subset of the standard [ASTC LDR](https://en.wikipedia.org/wiki/Adaptive_scalable_texture_compression) 4x4 (8bpp) texture format, but with a custom block format containing transcoding hints. Transcoding UASTC LDR to ASTC LDR and BC7 is particularly fast and simple, because UASTC LDR is a common subset of both BC7 and ASTC. The transcoders for the other texture formats are accelerated by several format-specific hint bits present in each UASTC LDR block. -This mode supports an optional [Rate-Distortion Optimizated (RDO)](https://en.wikipedia.org/wiki/Rate%E2%80%93distortion_optimization) post-process stage that conditions the encoded UASTC LDR texture data in the .KTX2/.basis file so it can be more effectively LZ compressed. More details [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-implementation-details). +This mode supports an optional [Rate-Distortion Optimized (RDO)](https://en.wikipedia.org/wiki/Rate%E2%80%93distortion_optimization) post-process stage that conditions the encoded UASTC LDR texture data in the .KTX2/.basis file so it can be more effectively LZ compressed. More details [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-implementation-details). Here is the [UASTC LDR 4x4 specification document](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-LDR-4x4-Texture-Specification). -3. [UASTC HDR 4x4](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-4x4-Texture-Specification-v1.0): An 8 bits/pixel HDR high quality mode. This is a 24 mode subset of the standard [ASTC HDR](https://en.wikipedia.org/wiki/Adaptive_scalable_texture_compression) 4x4 (8bpp) texture format. It's designed to be high quality, supporting the 27 partition patterns in common between BC6H and ASTC, and fast to transcode with very little loss (typically a fraction of a dB PSNR) to the BC6H HDR texture format. Notably, **UASTC HDR 4x4 data is 100% standard ASTC texture data**, so no transcoding at all is required on devices or API's supporting ASTC HDR. This mode can also be transcoded to various 32-64bpp uncompressed HDR texture/image formats. +3. **[UASTC HDR 4x4](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-4x4-Texture-Specification)**: An 8 bits/pixel HDR high quality mode. This is a 24 mode subset of the standard [ASTC HDR](https://en.wikipedia.org/wiki/Adaptive_scalable_texture_compression) 4x4 (8bpp) texture format. It's designed to be high quality, supporting the 27 partition patterns in common between BC6H and ASTC, and fast to transcode with very little loss (typically a fraction of a dB PSNR) to the BC6H HDR texture format. Notably, **UASTC HDR 4x4 data is 100% standard ASTC texture data**, so no transcoding at all is required on devices or APIs that support ASTC HDR. This mode can also be transcoded to various 32-64bpp uncompressed HDR texture/image formats. Here is the [UASTC HDR 4x4 specification document](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-4x4-Texture-Specification-v1.0), and here are some compressed [example images](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-Examples). -4. UASTC HDR 6x6 or RDO UASTC HDR 6x6: A 3.56 bits/pixel (or less with RDO+Zstd) HDR high quality mode. Just like mode #3, **UASTC HDR 6x6 data is 100% standard ASTC texture data**. Here's a [page with details](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Support-Notes). The current encoder supports weight grid upsampling, 1-3 subsets, single or dual planes, CEM's 7 and 11, and all unique ASTC partition patterns. +4. **ASTC HDR 6x6 or RDO ASTC HDR 6x6**: A 3.56 bits/pixel (or less with RDO+Zstd) HDR high quality mode. Just like mode #3, **ASTC HDR 6x6 data is 100% standard ASTC texture data**. Here's a [page with details](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Support-Notes). The current encoder supports weight grid upsampling, 1-3 subsets, single or dual planes, CEM's 7 and 11, and all unique ASTC partition patterns. + +The ASTC HDR decoder, used in the transcoder module, supports the entire ASTC HDR format. + +5. **UASTC HDR 6x6 Intermediate ("GPU Photo HDR")**: A custom compressed intermediate format that can be rapidly transcoded to ASTC HDR 6x6, BC6H, and various uncompressed HDR formats. The custom compressed file format is [described here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Intermediate-File-Format-(Basis-GPU-Photo-6x6)). The format supports 75 unique ASTC configurations, weight grid upsampling, 1-3 subsets, single or dual planes, CEM's 7 and 11, and all unique ASTC partition patterns. One of the first HDR GPU texture codecs supporting the [delta E ITP (ICtCp) colorspace metric](https://www.portrait.com/resource-center/about-deltae-e/) and perceptual saliency maps. + +6. **Standard ASTC LDR-4x4-12x12**. Supports all standard 14 ASTC block sizes. Transcodable from any ASTC block size to any other supported LDR texture format with adaptive deblocking, including BC7 using the [bc7f "one-shot" analytical BC7 encoder](https://github.com/BinomialLLC/basis_universal/wiki/Transcoder-Internals-Analytical-Real-Time-Encoders) (supporting all BC7 modes/features) and ETC1 (using etc1f, which also supports the entire ETC1 format). + +The ASTC LDR decoder, used in the transcoder module, supports the entire standard ASTC LDR format (i.e. not just ASTC texture blocks generated using our encoder). The ASTC LDR transcoder can transcode any block size ASTC (4x4 - 12x12) to the other LDR texture formats. + +7. **XUASTC LDR 4x4-12x12 ("GPU Photo LDR/SDR")**: Supercompressed ASTC with **Weight Grid DCT**, supporting all 14 standard ASTC block sizes, with adaptive deblocking when transcoding to other texture/pixel formats. Bitrates range from approximately 0.3–5.7 bpp, depending on content, profile, block size, windowed RDO, and Weight Grid DCT quality settings. Typical XUASTC LDR 4×4 (**8 bpp in memory**) transmission/on-disk bitrate with Weight Grid DCT (where it is least effective) is **1.15–3.5 bpp (typical ≈2.25 bpp)**, with larger block sizes achieving even lower usable bitrates, down to approximately 0.3 bpp. Like ASTC LDR, the XUASTC LDR transcoder can transcode any block size ASTC (4x4 - 12x12) to the other LDR texture formats, but with additional block-size specific optimizations. + +Supports three profiles: context-based range/arithmetic coding (for higher compression ratios), Zstd (for faster and simpler transcoding), or a hybrid profile using both approaches. Transcodable to all other supported LDR texture formats, including fully featured (all 8 modes, all dual-plane channel configurations, all mode settings) BC7. Certain common block sizes (4×4, 6×6, and 8×6) have specializations for particularly fast transcoding directly to BC7, bypassing analytical BC7 encoding (using [bc7f](https://github.com/BinomialLLC/basis_universal/wiki/Transcoder-Internals-Analytical-Real-Time-Encoders)) entirely for the most common ASTC configurations (solid color and single-subset CEMs). -5. UASTC HDR 6x6 Intermediate ("GPU Photo"): A custom compressed intermediate format that can be rapidly transcoded to ASTC HDR 6x6, BC6H, and various uncompressed HDR formats. The custom compressed file format is [described here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-6x6-Intermediate-File-Format-(Basis-GPU-Photo-6x6)). The format supports 75 unique ASTC configurations, weight grid upsampling, 1-3 subsets, single or dual planes, CEM's 7 and 11, and all unique ASTC partition patterns. +Weight Grid DCT can be disabled; however, supercompression remains available with optional, configurable windowed RDO. Compatible with all major image and texture content types, including photographic images, lightmaps, albedo/specular textures, various types of normal maps, luminance-only maps, and geospatial mapping signals. -Notes: -- Modes #3 and #4 output 100% standard or plain ASTC texture data (with or without RDO), like any other ASTC encoder. The .KTX2 files are just plain textures. -- The other modes (#1, #2, #5) output compressed data in various custom formats, which our transcoder library can convert in real-time to various GPU texture or pixel formats. -- Modes #4 and #5 internally use the same unified HDR 6x6 encoder. +Supports adaptive deblocking when transcoding from larger block sizes; this can be disabled using a transcoder flag. + +One interesting use of XUASTC LDR which works with any of the 14 block sizes: the efficient distribution of texture content compressed to very low bitrates vs. older systems, resulting in game-changing download time reductions. Using the larger XUASTC block sizes (beyond 6x6) with Weight Grid DCT and adaptive deblocking on either the CPU or [GPU using a simple shader](https://github.com/BinomialLLC/basis_universal/tree/master/shader_deblocking), **any developer can now distribute texture and image content destined for BC7 at .35-1.5 bpp**, and **cache the transcoded BC7 data on a modern Gen 4 or 5 (10+ GB/sec.) SSD**. + +XUASTC LDR supports the following ASTC configurations: L/LA/RGB/RGBA CEMs; base+scale or RGB/RGBA direct; base+offset CEMs; Blue Contraction encoding; 1–3 subsets; all partition patterns; and single- or dual-plane modes. Here is the [XUASTC LDR specification](https://github.com/BinomialLLC/basis_universal/wiki/XUASTC-LDR-Specification-v1.0). Also see the [ASTC and XUASTC LDR Usage Guide](https://github.com/BinomialLLC/basis_universal/wiki/ASTC-and-XUASTC-LDR-Usage-Guide). + +Notes: +- Mode #1 (ETC1S) has special support and optimizations for basic temporal supercompression ([texture video](https://github.com/BinomialLLC/basis_universal/wiki/Encoding-ETC1S-and-XUASTC-LDR-Texture-Video)). +- Modes #3 (UASTC HDR 4x4) and #4 (RDO ASTC HDR 6x6), and #6 (ASTC LDR 4x4-12x12) output 100% standard ASTC texture data (with or without RDO), like any other ASTC encoder. The .KTX2 files are just plain textures. +- The other modes (#1, #2, #5, #7) output compressed data in various custom supercompressed formats, which our transcoder library can convert in real-time to various GPU texture or pixel formats. +- Modes #4 (ASTC HDR 6x6) and #5 (UASTC HDR 6x6) internally use the same unified ASTC HDR 6x6 encoder. +- Modes #6 (ASTC LDR 4x4-12x12) and #7 (XUASTC LDR 4x4-12x12) internally use the same unified ASTC LDR ASTC encoder. ### Other Features -Both .basis and .KTX2 files support mipmap levels, texture arrays, cubemaps, cubemap arrays, and texture video, in all five modes. Additionally, .basis files support non-uniform texture arrays, where each image in the file can have a different resolution or number of mipmap levels. +Both .basis and .KTX2 files support mipmap levels, texture arrays, cubemaps, cubemap arrays, and texture video, in all modes. Additionally, .basis files support non-uniform texture arrays, where each image in the file can have a different resolution or number of mipmap levels. In ETC1S mode, the compressor is able to exploit color and pattern correlations across all the images in the entire file using global endpoint/selector codebooks, so multiple images with mipmaps can be stored efficiently in a single file. The ETC1S mode also supports skip blocks (Conditional Replenishment) for short video sequences, to prevent sending blocks which haven't changed relative to the previous frame. @@ -91,18 +134,21 @@ The LDR image formats supported for reading are .PNG, [.DDS with mipmaps](https: The system now supports loading basic 2D .DDS files with optional mipmaps, but the .DDS file must be in one of the supported uncompressed formats: 24bpp RGB, 32bpp RGBA/BGRA, half-float RGBA, or float RGBA. Using .DDS files allows the user to control exactly how the mipmaps are generated before compression. -Building --------- +---- + +Building (Native) +----------------- The encoding library and command line tool have no required 3rd party dependencies that are not already in the repo itself. The transcoder is a single .cpp source file (in `transcoder/basisu_transcoder.cpp`) which has no 3rd party dependencies. We build and test under: -- Windows x86/x64 using Visual Studio 2019/2022, MSVC or clang -- Windows ARM using Visual Studio 2022 ARM v17.13.0 or later -- Mac OSX (M1) with clang v15.0 -- Ubuntu Linux with gcc v11.4 or clang v14 -- Arch Linux ARM, on a [Pinebook Pro](https://pine64.org/devices/pinebook_pro/), with gcc v12.1. +- Windows x86/x64 using Visual Studio 2026, MSVC or clang +- Windows ARM using Visual Studio 2022 ARM 17.13.0 +- Ubuntu Linux 24.04.3 LTS (noble) with gcc 13.3.0 or clang 18.1.3 +- macOS (M1) with clang 16.0.0 +- Arch Linux ARM, on a [Pinebook Pro](https://pine64.org/devices/pinebook_pro/), with gcc 12.1. - Ubuntu Linux 24.04 on RISC-V (Orange PI RV2) +- cmake: 3.28.3, emcc: 4.0.19 Under Windows with Visual Studio you can use the included `basisu.sln` file. Alternatively, you can use cmake to create new VS solution/project files. @@ -115,9 +161,76 @@ cmake .. make ``` -To build with SSE 4.1 support on x86/x64 systems (encoding is roughly 15-30% faster), add `-DSSE=TRUE` to the cmake command line. Add `-DOPENCL=TRUE` to build with (optional) OpenCL support. Use `-DCMAKE_BUILD_TYPE=Debug` to build in debug. To build 32-bit executables, add `-DBUILD_X64=FALSE`. +To build with SSE 4.1 support on x86/x64 systems (ETC1S encoding is roughly 15-30% faster), add `-DBASISU_SSE=TRUE` to the cmake command line. Add `-DBASISU_OPENCL=TRUE` to build with (optional) OpenCL support. Use `-DCMAKE_BUILD_TYPE=Debug` to build in debug. To build 32-bit executables, add `-DBASISU_BUILD_X64=FALSE`. + +After building, the native command line tool used to create, validate, and transcode/unpack .KTX2/.basis files is `bin/basisu`. + +*Note we use C++17 for compiling the software. Anything later is too new for us. Compiling the software with a newer C++ version is not supported by us yet.* + +---- + +Running the Precompiled WASM WASI Executables +--------------------------------------------- + +For smaller images/textures (~4 megatexels or less), there are precompiled, secure, cross-platform 32-bit .WASM WASI executables checked into the `bin` directory: `basisu_mt.wasm` (multithreaded) and `basisu_st.wasm` (single threaded). Quick testing - ETC1S/UASTC LDR 4x4 (all platforms) - multithreaded and single threaded, using [wasmtime](https://wasmtime.dev/): + +Tested with wasmtime v39.0.0: + +``` +cd bin +wasmtime run --dir=. --dir=../test_files --wasm threads=yes --wasi threads=yes ./basisu_mt.wasm -test +wasmtime run --dir=. --dir=../test_files ./basisu_st.wasm -test +``` + +For newer versions of wasmtime such as v42.0.1 add `--wasm shared-memory=yes`: + +``` +wasmtime run --dir=. --dir=../test_files --wasm threads=yes --wasm shared-memory=yes --wasi threads=yes ./basisu_mt.wasm -test +``` + +See the `runwt.sh`, `runwt.bat`, `runw.sh`, or `runw.bat` scripts for examples on how to run the WASM executables using wasmtime. Windows example for XUASTC LDR 6x6 compression using the arithmetic profile, with Weight Grid DCT level 70: + +``` +cd bin +runwt.bat ../test_files/tough.png -xuastc_ldr_6x6 -quality 70 -xuastc_arith +runwt.bat tough.ktx2 +``` + +Linux/macOS: + +``` +cd bin +chmod +x runwt.sh +./runwt.sh ../test_files/tough.png -xuastc_ldr_6x6 -quality 70 -xuastc_arith +./runwt.sh tough.ktx2 +``` + +Unfortunately, 32-bit WASM WASI executables have tradeoffs vs. native executables: Limited memory, and slower performance (somewhat mitigatable using WASM threading, which we support). **32-bit WASM WASI memory constraints limit the maximum image/texture size that can be compressed to ASTC LDR or XUASTC LDR to around 4 megapixels.** (The other codecs have lower memory requirements.) For Web, we support both WASM and WASM64 (with or without threading), which greatly improves the WASM memory situation. As far as we know as of 2/2026, wasmtime supports WASM64, but the WASI SDK still [doesn't officially support the wasm64-wasi target](https://github.com/WebAssembly/wasi-sdk/issues/212), but once it does we'll support it. + +Building (WASM WASI) +-------------------- + +To build the WASM WASI executables, you will need the [WASM WASI SDK](https://github.com/WebAssembly/wasi-sdk) installed. The `WASI_SDK_PATH` environment variable must be set to the correct path where the SDK is installed. + +Multithreaded: +``` +mkdir build_wasm_mt +cd build_wasm_mt +cmake -DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk-pthread.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=ON .. +make +``` + +Single threaded: +``` +mkdir build_wasm_st +cd build_wasm_st +cmake -DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=OFF .. +make +``` -After building, the native command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is `bin/basisu`. +The WASM WASI executables will be placed in the `bin` directory. These platform-independent executables are fully functional, and can be executed using a WASM WASI runtime such as [wasmtime](https://github.com/bytecodealliance/wasmtime). + +---- ### Testing the Codec @@ -129,26 +242,60 @@ basisu -test basisu -test_hdr_4x4 basisu -test_hdr_6x6 basisu -test_hdr_6x6i +basisu -test_xuastc_ldr ``` -To test the codec in OpenCL mode (must have OpenCL libs/headers/drivers installed and have compiled OpenCL support in by running cmake with `-DOPENCL=TRUE`): +To test the codec in OpenCL mode (must have OpenCL libs/headers/drivers installed and have compiled OpenCL support in by running cmake with `-DBASISU_OPENCL=TRUE`): ``` basisu -test -opencl ``` +---- + Compressing and Unpacking .KTX2/.basis Files -------------------------------------------- -- To compress an LDR sRGB PNG/QOI/TGA/JPEG/DDS image to an ETC1S .KTX2 file, at quality level 255 (the highest): +- To compress an LDR sRGB PNG/QOI/TGA/JPEG/DDS image to a supercompressed XUASTC LDR 6x6 .KTX2 file, at quality level 75 (**valid quality levels 1-100, where higher values=higher quality**), effort level 4 (**valid effort levels 0-10, higher values=slower compression, default effort is 3**): + +`basisu -xuastc_ldr_6x6 -quality 75 -effort 4 x.png` + +`-quality 100` disables Weight Grid DCT, leaving just lossless supercompression of ASTC. An alias for `-xuastc_ldr_6x6` is `-ldr_6x6i` (where 'i'="intermediate"). All **[14 standard ASTC block sizes](https://developer.nvidia.com/astc-texture-compression-for-game-assets) are supported, from 4x4-12x12**: 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 10x5, 10x6, 8x8, 10x8, 10x10, 12x10 and 12x12. The **XUASTC LDR to BC7 transcoder has special optimizations for several common block sizes: 4x4, 6x6 and 8x6**. When transcoding XUASTC LDR at these particular block sizes, most XUASTC blocks are *directly* transcoded to BC7 (i.e. directly from the XUASTC latent to the BC7 latent), skipping the real-time analytical bc7f encoding step. + +More XUASTC LDR specific options (many of these also apply to standard ASTC - see our [ASTC/XUASTC Usage Guide](https://github.com/BinomialLLC/basis_universal/wiki/ASTC-and-XUASTC-LDR-Usage-Guide)): + + - The options `-xuastc_arith`, `-xuastc_zstd` (the default), and `-xuastc_hybrid` control the **XUASTC LDR profile used**. The arithmetic profile trades off transcoding throughput for roughly 5-18% better compression vs. the Zstd profile, and the hybrid profile is a balance between the two. + + - `-ts` or `-srgb` enables the **sRGB profile (the default)**, and `-tl` or `-linear` **enables the linear profile**. Ideally this setting will match how the ASTC texture is sampled by the GPU. Use linear on normal maps. + + - `-weights X Y Z W` sets the unsigned integer **channel error weights**, used to favor certain channels during compression. -`basisu -q 255 x.png` + - Another set of XUASTC specific options overrides the **windowed RDO behavior** (windowed or bounded RDO is a separate and optional perceptual optimization vs. Weight Grid DCT): `-xy` enables and `-xyd` disables windowed RDO. By default, if Weight Grid DCT is not enabled (i.e. `-quality` isn't specified, or is set to 100), windowed RDO is disabled. Windowed RDO is automatically enabled if the quality level is less than 100, unless `-xyd` is specified. Also see the tool's [help text](https://github.com/BinomialLLC/basis_universal/blob/master/cmd_help/cmd_help.txt) for additional windowed RDO options: `-ls_min_psnr`, `-ls_min_alpha_psnr`, `-ls_thresh_psnr`, `-ls_thresh_alpha_psnr`, etc. -- For a linear LDR image, in ETC1S mode, at default quality (128): + - `-xs` disables 2-3 subset usage, and `-xp` disables dual plane usage (slightly higher compression, faster direct transcoding to BC7 will occur more often) + - `-higher_quality_transcoding`: Permits slower but higher quality transcoding + - `-no_deblocking`: Disables adaptive deblocking on ASTC block sizes > 8x6 (faster) + - `-force_deblocking`: Always use adaptive deblocking filter, even for block sizes <= 8x6 (slower) + - `-stronger_deblocking`: Use stronger deblocking when it's enabled (same performance) + - `-fast_xuastc_ldr_bc7_transcoding` and `-no_fast_xuastc_ldr_bc7_transcoding`: Controls faster direct XUASTC->BC7 transcoding (defaults to enabled, which is slightly lower quality) + +- To compress an LDR sRGB image to a standard ASTC LDR 6x6 .KTX2 file, using effort level 4 (valid effort levels 0-10): + +`basisu -astc_ldr_6x6 -effort 4 x.png` + +An alias for `-astc_ldr_6x6` is `-ldr_6x6`. + +Just like XUASTC LDR, all 14 standard ASTC block sizes are supported, from 4x4-12x12. Internally the XUASTC LDR encoder is used, but standard ASTC block data is output, instead of supercompressed XUASTC LDR. Most XUASTC LDR options also work in ASTC LDR mode. + +- To compress an LDR sRGB image to an ETC1S .KTX2 file, at quality level 100 (the highest): + +`basisu -quality 100 x.png` + +- For a linear LDR image, in ETC1S mode, at default quality (`-quality 50`, or the older `-q 128`): `basisu -linear x.png` -- To compress to UASTC LDR, which is much higher quality than ETC1S: +- To compress to UASTC LDR 4x4, which is much higher quality than ETC1S, but lower maximum quality vs. ASTC/XUASTC LDR 4x4: `basisu -uastc x.png` @@ -156,43 +303,41 @@ Compressing and Unpacking .KTX2/.basis Files `basisu x.exr` -- To compress an HDR 6x6 file: +- To compress a standard ASTC HDR 6x6 file (~3.56 bpp): ``` -basisu -hdr_6x6 x.exr -basisu -hdr_6x6 -lambda 500 x.exr +basisu -hdr_6x6 x.exr +basisu -hdr_6x6 -lambda 500 x.exr basisu -hdr_6x6_level 5 -lambda 500 x.exr ``` -- To compress an HDR 6x6 file using the compressed intermediate format for smaller files: +- To compress a UASTC HDR 6x6i file (using the compressed intermediate format) for smaller files (~1.75-3.0 bpp): ``` -basisu -hdr_6x6i x.exr -basisu -hdr_6x6i -lambda 500 x.exr +basisu -hdr_6x6i x.exr +basisu -hdr_6x6i -lambda 500 x.exr basisu -hdr_6x6i_level 5 -lambda 500 x.exr ``` -Note the .EXR reader we're using is [TinyEXR's](https://github.com/syoyo/tinyexr), which doesn't support all possible .EXR compression modes. Tools like [ImageMagick](https://imagemagick.org/) can be used to create .EXR files that TinyEXR can read. +Note the unified `-quality` and `-effort` options work in HDR, too. These examples use the older non-unified options, which allow more direct/precise control. -Alternatively, LDR images (such as .PNG) can be compressed to an HDR format by specifying `-hdr`, `-hdr_6x6`, or `-hdr_6x6i`. By default LDR images, when compressed to an HDR format, are first upconverted to HDR by converting them from sRGB to linear light and scaled to 100 [nits](https://en.wikipedia.org/wiki/Candela_per_square_metre) (candelas per square meter). The sRGB conversion step can be disabled by specifying `-hdr_ldr_no_srgb_to_linear`, and the normalized RGB linear light to nit multiplier can be changed by specifying `-hdr_ldr_upconversion_nit_multiplier X`. +Be aware that the .EXR reader we use is [TinyEXR's](https://github.com/syoyo/tinyexr), which doesn't support all possible .EXR compression modes. Tools like [ImageMagick](https://imagemagick.org/) can be used to create .EXR files that TinyEXR can read. -Note: If you're compressing LDR/SDR image files to an HDR format, the codec's default behavior is to convert the 8-bit image data to linear light (by undoing the sRGB transfer function). It then multiplies the linear light RGB values by the LDR->HDR upconversion multiplier, which is in [nits (candela per sq. meter)](https://en.wikipedia.org/wiki/Candela_per_square_metre). In previous versions of the codec, this multiplier was effectively 1 nit, but it now defaults to 100 nits in all modes. (The typical luminance of LDR monitors is 80-100 nits.) To change this, use the "-hdr_ldr_upconversion_nit_multiplier X" command line option. (This is done because the HDR 6x6 codecs function internally in the [ICtCp HDR colorspace](https://en.wikipedia.org/wiki/ICtCp). LDR/SDR images must be upconverted to linear light HDR images scaled to a proper max. luminance based off how the image data will be displayed on actual SDR/HDR monitors.) +Alternatively, LDR images (such as .PNG) can be compressed to an HDR format by specifying `-hdr`, `-hdr_6x6`, or `-hdr_6x6i`. By default LDR images, when compressed to an HDR format, are first upconverted to HDR by converting them from sRGB to linear light and scaled to 100 [nits - candela per sq. meter, cd/m²](https://grokipedia.com/page/Candela_per_square_metre). The sRGB conversion step can be disabled by specifying `-hdr_ldr_no_srgb_to_linear`, and the normalized RGB linear light to nit multiplier can be changed by specifying `-hdr_ldr_upconversion_nit_multiplier X`. -### Some Useful Command Line Options - -- `-fastest` (which is equivalent to `-uastc_level 0`) puts the UASTC LDR/HDR encoders in their fastest (but lower quality) modes. +Note: If you're compressing LDR/SDR image files to an HDR format, the codec's default behavior is to convert the 8-bit image data to linear light (by undoing the sRGB transfer function). It then multiplies the linear light RGB values by the LDR->HDR upconversion multiplier, which is in nits. In previous versions of the codec, this multiplier was effectively 1 nit, but it now defaults to 100 nits in all modes. (The typical luminance of LDR monitors is 80-100 nits.) To change this, use the "-hdr_ldr_upconversion_nit_multiplier X" command line option. (This is done because the HDR 6x6 codecs function internally in the [ICtCp HDR colorspace](https://en.wikipedia.org/wiki/ICtCp). LDR/SDR images must be upconverted to linear light HDR images scaled to a proper max. luminance based on how the image data will be displayed on actual SDR/HDR monitors.) -- `-slower` puts the UASTC LDR/HDR encoders in higher quality but slower modes (equivalent to `-uastc_level 3`). The default level is 1, and the highest is 4 (which is quite slow). +### Some Useful Command Line Options -- `-q X`, where X ranges from [1,255], controls the ETC1S mode's quality vs. file size tradeoff level. 255 is the highest quality, and the default is 128. +- All codecs now support simple unified "quality" and "effort" settings. `-effort X` [0,10] controls how much of the search space (and how slowly) compression proceeds, and `-quality X` [1,100] controls the quality vs. bitrate tradeoff. Internally these settings will be mapped to each codec's specific configuration settings. Almost all the older settings still work, however. Previously, `-q X`, where X ranged from [1,255], controlled the ETC1S quality setting. This option is still available, but `-quality` is preferred now. - `-debug` causes the encoder to print internal and developer-oriented verbose debug information. -- `-stats` to see various quality (PSNR) statistics. +- `-stats` to see various quality (PSNR) statistics. - `-linear`: ETC1S defaults to sRGB colorspace metrics, UASTC LDR currently always uses linear metrics, and UASTC HDR defaults to weighted RGB metrics (with 2,3,1 weights). If the input is a normal map, or some other type of non-sRGB (non-photographic) texture content, be sure to use `-linear` to avoid extra unnecessary artifacts. (Angular normal map metrics for UASTC LDR/HDR are definitely doable and on our TODO list.) -- Specifying `-opencl` enables OpenCL mode, which currently only accelerates ETC1S encoding. +- Specifying `-opencl` enables OpenCL mode, which currently only accelerates ETC1S encoding if it's been enabled at compile time. - The compressor is multithreaded by default, which can be disabled using the `-no_multithreading` command line option. The transcoder is currently single threaded, although it is thread safe (i.e. it supports decompressing multiple texture slices in parallel). @@ -205,9 +350,9 @@ More Example Command Lines `-uastc_rdo_l X` controls the RDO ([Rate-Distortion Optimization](https://en.wikipedia.org/wiki/Rate%E2%80%93distortion_optimization)) quality setting. The lower this value, the higher the quality, but the larger the compressed file size. Good values to try are between .2-3.0. The default is 1.0. -- To add automatically generated mipmaps to a ETC1S .KTX2 file, at a higher than default quality level (which ranges from [1,255]): +- To add automatically generated mipmaps to an ETC1S .KTX2 file: -`basisu -mipmap -q 200 x.png` +`basisu -mipmap -quality 75 x.png` There are several mipmap options to change the filter kernel, the filter colorspace for the RGB channels (linear vs. sRGB), the smallest mipmap dimension, etc. The tool also supports generating cubemap files, 2D/cubemap texture arrays, etc. To bypass the automatic mipmap generator, you can create LDR or HDR uncompressed [.DDS texture files](https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide) and feed them to the compressor. @@ -215,9 +360,9 @@ There are several mipmap options to change the filter kernel, the filter colorsp `basisu -comp_level 2 x.png` -On some rare images (ones with blue sky gradients come to bind), you may need to increase the ETC1S `-comp_level` setting, which ranges from 1,6. This controls the amount of overall effort the encoder uses to optimize the ETC1S codebooks and the compressed data stream. Higher comp_level's are *significantly* slower. +On some rare images (ones with blue sky gradients come to mind), you may need to increase the ETC1S `-comp_level` setting, which ranges from 1 to 6. This controls the amount of overall effort the encoder uses to optimize the ETC1S codebooks and the compressed data stream. Higher -comp_level's are *significantly* slower. -- To manually set the ETC1S codebook sizes (instead of using -q), with a higher codebook generation level (this is useful with texture video): +- To manually set the ETC1S codebook sizes (instead of using -quality, or the older -q options), with a higher codebook generation level (this is useful with texture video): `basisu x.png -comp_level 2 -max_endpoints 16128 -max_selectors 16128` @@ -238,31 +383,79 @@ See the help text for a complete listing of the tool's command line options. The Unpacking .KTX2/.basis files to .PNG/.EXR/.KTX/.DDS files --------------------------------------------------------- -You can either use the command line tool or [call the transcoder directly](https://github.com/BinomialLLC/basis_universal/wiki/How-to-Use-and-Configure-the-Transcoder) from JavaScript or C/C++ code to decompress .KTX2/.basis files to GPU texture data or uncompressed image data. To unpack a .KTX2 or.basis file to multiple .png/.exr/.ktx/.dds files: +You can either use the command line tool or [call the transcoder directly](https://github.com/BinomialLLC/basis_universal/wiki/How-to-Use-and-Configure-the-Transcoder) from JavaScript or C/C++ code to decompress .KTX2/.basis files to GPU texture data or uncompressed image data. To unpack a .KTX2 or .basis file to multiple .png/.exr/.ktx/.dds files: `basisu x.ktx2` -Use the `-no_ktx` and `-etc1_only`/`-format_only` options to unpack to less files. +Use the `-no_ktx` and `-etc1_only`/`-format_only` options to unpack to less files. + +`-info` and `-validate` will just display file information and not output any files. + +The written mipmapped, cubemap, or texture array .KTX/.DDS files will be in a wide variety of compressed GPU texture formats (PVRTC1 4bpp, ETC1-2, BC1-5, BC7, etc.), and to our knowledge there is unfortunately (as of 2024) still no single .KTX or .DDS viewer tool that correctly and reliably supports every GPU texture format that we support. BC1-5 and BC7 files are viewable using AMD's Compressonator, ETC1/2 using Mali's Texture Compression Tool, and PVRTC1 using Imagination Tech's PVRTexTool. [RenderDoc](https://renderdoc.org/) has a useful texture file viewer for many formats. The macOS *Finder* app supports previewing .EXR, .ASTC and .KTX files in various GPU formats, including ASTC LDR/ HDR. The Windows 11 Explorer can preview .DDS files. The [online OpenHDR Viewer](https://viewer.openhdr.org/) is useful for viewing .EXR/.HDR image files. + +---- + +Pixel Shader Deblocking Sample: CPU + GPU Deblocking Everywhere +--------------------------------------------------------------- + +The [shader_deblocking sample](https://github.com/BinomialLLC/basis_universal/blob/master/shader_deblocking/README.md) in the repo demonstrates how to use a simple pixel shader to deblock sampled textures of any block size between 4x4-12x12, greatly reducing block artifacts. The sample shader is compatible with mipmapping and bilinear or trilinear filtering. Ultimately, shader deblocking enables the usage of larger ASTC block sizes, reducing bitrate and increasing transcoding speeds. Deblocking is a standard feature of modern image and video codecs, and there's no reason why it can't be used while sampling (or transcoding) GPU textures. Using larger ASTC block sizes can significantly reduce GPU memory bandwidth. If bandwidth is the bottleneck — as it often is — the modest ALU and texture sampling cost of deblocking can be effectively free. + +XUASTC LDR's transcoder supports adaptive deblocking when transcoding to other (non-ASTC) formats like BC7, and GPU shader deblocking can be used for ASTC, resulting in a complete deblocking system for ASTC. + +---- + +Python Support +-------------- + +All key encoder and all transcoder functionality is now available from Python, but this is still in the early stages of development. See the README files in the python directory for how to build the native SO's/PYD's. The Python support module supports both native and WASM modules, which is used as a fallback if native libraries can't be loaded. Python support has been tested under Ubuntu Linux and Windows 11 so far. -`-info` and `-validate` will just display file information and not output any files. +Example: +``` +cd python +python3 -m tests.test_backend_loading +========== BACKEND LOADING TEST ========== + +Testing native backend... +[Encoder] Using native backend + [OK] Native backend loaded +Hello from basisu_wasm_api.cpp version 200 + Native get_version() ? 200 + Native alloc() returned ptr = 190977024 + Native free() OK + [OK] Native basic operations working. + +Testing WASM backend... +[WASM Encoder] Loaded: /mnt/c/dev/xuastc4/python/basisu_py/wasm/basisu_module_st.wasm +[Encoder] Using WASM backend + [OK] WASM backend loaded +Hello from basisu_wasm_api.cpp version 200 + WASM get_version() ? 200 + WASM alloc() returned ptr = 26920160 + WASM free() OK + [OK] WASM basic operations working. + +========== DONE ========== +``` -The written mipmapped, cubemap, or texture array .KTX/.DDS files will be in a wide variety of compressed GPU texture formats (PVRTC1 4bpp, ETC1-2, BC1-5, BC7, etc.), and to our knowledge there is unfortunately (as of 2024) still no single .KTX or .DDS viewer tool that correctly and reliably supports every GPU texture format that we support. BC1-5 and BC7 files are viewable using AMD's Compressonator, ETC1/2 using Mali's Texture Compression Tool, and PVRTC1 using Imagination Tech's PVRTexTool. [RenderDoc](https://renderdoc.org/) has a useful texture file viewer for many formats. The Mac OSX Finder supports previewing .EXR and .KTX files in various GPU formats. The Windows 11 Explorer can preview .DDS files. The [online OpenHDR Viewer](https://viewer.openhdr.org/) is useful for viewing .EXR/.HDR image files. +---- WebGL Examples -------------- -The 'WebGL' directory contains several simple WebGL demos that use the transcoder and compressor compiled to [WASM](https://webassembly.org/) with [emscripten](https://emscripten.org/). These demos are online [here](https://subquantumtech.com/uastchdr2/). See more details in the readme file [here](webgl/README.md). +The 'WebGL' directory contains several simple WebGL demos that use the transcoder and compressor compiled to [WASM](https://webassembly.org/) with [Emscripten](https://emscripten.org/). These demos are online [here](https://subquantumtech.com/xu/). See more details in the readme file [here](webgl/README.md). ![Screenshot of 'texture' example running in a browser.](webgl/texture_test/preview.png) ![Screenshot of 'gltf' example running in a browser.](webgl/gltf/preview.png) ![Screenshot of 'encode_test' example running in a browser.](webgl/ktx2_encode_test/preview.png) -Building the WASM Modules with [Emscripten](https://emscripten.org/) +---- + +Building the WASM Modules with [Emscripten](https://emscripten.org/) -------------------------------------------------------------------- -Both the transcoder and encoder may be compiled using emscripten to WebAssembly and used on the web. A set of JavaScript wrappers to the codec, written in C++ with emscripten extensions, is located in `webgl/transcoding/basis_wrappers.cpp`. The JavaScript wrapper supports nearly all features and modes, including texture video. See the README.md and CMakeLists.txt files in `webgl/transcoder` and `webgl/encoder`. +Both the transcoder and encoder may be compiled using Emscripten to WebAssembly and used on the web. A set of JavaScript wrappers to the codec, written in C++ with Emscripten extensions, is located in [`webgl/transcoding/basis_wrappers.cpp`](https://github.com/BinomialLLC/basis_universal/blob/master/webgl/transcoder/basis_wrappers.cpp). The JavaScript wrapper supports nearly all features and modes, including texture video. See the [README.md](https://github.com/BinomialLLC/basis_universal/tree/master/webgl) and CMakeLists.txt files in `webgl/transcoder` and `webgl/encoder`. -To build the WASM transcoder, after installing emscripten: +To build the WASM transcoder, after installing Emscripten: ``` cd webgl/transcoder/build @@ -278,17 +471,16 @@ emcmake cmake .. make ``` -There are two simple encoding/transcoding web demos, located in `webgl/ktx2_encode_test` and `webgl/texture_test`, that show how to use the encoder's and transcoder's Javascript wrapper API's. +There are several simple encoding/transcoding web demos, located in `webgl/ktx2_encode_test` and `webgl/texture_test`, that show how to use the encoder's and transcoder's JavaScript wrapper APIs. They are [live on the web here](https://subquantumtech.com/xu/). + +---- Low-level C++ Encoder/Transcoder API Examples --------------------------------------------- -Some simple examples showing how to directly call the C++ encoder and transcoder library API's are in [`example/examples.cpp`](https://github.com/BinomialLLC/basis_universal/blob/master/example/example.cpp). - -ETC1S Texture Video Tips ------------------------- +Some simple examples showing how to directly call the C++ encoder and transcoder library APIs are in [`example/example.cpp`](https://github.com/BinomialLLC/basis_universal/blob/master/example/example.cpp). -See the wiki [here](https://github.com/BinomialLLC/basis_universal/wiki/Encoding-ETC1S-Texture-Video-Tips). +---- Installation using the vcpkg dependency manager ----------------------------------------------- @@ -303,37 +495,46 @@ You can download and install Basis Universal using the [vcpkg](https://github.co The Basis Universal port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. (9/10/2024: UASTC HDR support is not available here yet.) -License -------- +--- -The transcoder and core encoder libraries are Apache 2.0. The transcoder utilizes no 3rd party libraries or dependencies. See [LICENSE](https://github.com/BinomialLLC/basis_universal/blob/master/LICENSE). +Project Policies +---------------- -The encoder library is Apache 2.0, but it utilizes some open source 3rd party modules (in 'encoder/3rdparty' and in the 'Zstd' directory) to load [.QOI](https://qoiformat.org/), [.DDS](https://github.com/DeanoC/tiny_dds), [.EXR](https://github.com/syoyo/tinyexr) images, to handle [Zstd](https://github.com/facebook/zstd) compression, and to unpack ASTC texture blocks. See the [LICENSES](https://github.com/BinomialLLC/basis_universal/tree/master/LICENSES) and [.reuse](https://github.com/BinomialLLC/basis_universal/blob/master/.reuse/dep5) folders. +See our wiki page: [Project Policies: PRs, compiler warnings, release cadence etc.](https://github.com/BinomialLLC/basis_universal/wiki/Project-Policies:-PR's,-compiler-warnings,-release-cadence,-etc.). -Repository Licensing with REUSE -------------------------------- - -The repository has been updated to be compliant with the REUSE license -checking tool (https://reuse.software/). See the `.reuse` subdirectory. +---- -External Tool Links +KTX2 Support Status ------------------- -[Online .EXR HDR Image File Viewer](https://viewer.openhdr.org/) - -[Windows HDR + WCG Image Viewer](https://13thsymphony.github.io/hdrimageviewer/) - A true HDR image viewer for Windows. Also see [the github repo](https://github.com/13thsymphony/HDRImageViewer). +Note as of March 2026 we are working with Khronos on the exact details of how we embed XUASTC LDR supercompressed texture data into the KTX2 file format. KTX2 texture files using our previous codecs (including the recently added UASTC HDR 4x4 and UASTC HDR 6x6i formats) can now be interchanged with other KTX2 tools. See our [KTX2 technical information document](https://github.com/BinomialLLC/basis_universal/wiki/KTX2-File-Format-Support-Technical-Details) for more info. -[RenderDoc](https://renderdoc.org/) +Whenever possible, we keep full introspection/transcode compatibility with all of our previously written KTX2 files, even if during standardization a file format change is made. We don't expect how we embed XUASTC LDR into KTX2 in basisu v2.1 to change. -[AMD Compressonator](https://gpuopen.com/gaming-product/compressonator/) +---- -[Microsoft's DirectXTex](https://github.com/microsoft/DirectXTex) +Repository Licensing with REUSE +------------------------------- -[PVRTexTool](https://www.imgtec.com/developers/powervr-sdk-tools/pvrtextool/) +The repository has been updated to be compliant with the REUSE license +checking tool (https://reuse.software/). See the [.reuse](https://github.com/BinomialLLC/basis_universal/tree/master/.reuse) subdirectory. -[Mali Texture Compression Tool](https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/52390/announcement-mali-texture-compression-tool-end-of-life) - Now deprecated +External Links +-------------- -For more useful links, papers, and tools/libraries, see the end of the [UASTC HDR texture specification](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-Texture-Specification-v1.0). +- [btx - KTX2 Command Line Tool](https://github.com/BinomialLLC/KTX-Software-Binomial-Fork) - Our fork of KTX-Software with bug fixes, working HDR quality/effort controls, new options, and new codec integrations. This tool can validate, extract, and compress KTX2 files compatible with our project. +- [ARM's astcenc](https://github.com/ARM-software/astc-encoder) - Crucial official tool from ARM which can unpack ASTC format LDR/HDR .astc and .ktx files to .png or .exr for testing and verification purposes. +- [Online .EXR and .HDR Image File Viewer](https://viewer.openhdr.org/) - OpenHDR Viewer. Has a very well implemented tone mapper, auto-exposure, and HDR histogram. +- [Windows HDR + WCG Image Viewer](https://13thsymphony.github.io/hdrimageviewer/) - A true HDR image viewer for Windows which works on HDR monitors. Also see [the github repo](https://github.com/13thsymphony/HDRImageViewer). +- [AMD Compressonator](https://gpuopen.com/compressonator/) - .DDS viewer, can view .KTX files in some formats. +- [PVRTexTool](https://www.imgtec.com/developers/powervr-sdk-tools/pvrtextool/) - Can view .ASTC and .KTX files in some formats. (Note: .DDS viewer seems busted for BC1, doesn't support BC7 at all.) +- [Microsoft's DirectXTex](https://github.com/microsoft/DirectXTex) - Samples contain a basic .DDS viewer. (Note: May still have issues loading .DDS files with texture dimensions that aren't divisible by 4 texels.) +- [RenderDoc](https://renderdoc.org/) - Reliable viewer for LDR/HDR .DDS files in BC1-7 formats. +- [Paint.NET](https://www.getpaint.net/) - Windows app: built-in [.DDS file loading](https://github.com/0xC0000054/pdn-ddsfiletype-plus), supports BC1-7 and [cubemaps](https://github.com/0xC0000054/pdn-ddsfiletype-plus/wiki/Cube-Maps) +- [Mali Texture Compression Tool](https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/52390/announcement-mali-texture-compression-tool-end-of-life) - Now deprecated. +- [Our GitHub wiki content statically mirrored as HTML](https://subquantumtech.com/basisu_wiki/Home.html), which lags behind the [live GitHub wiki](https://github.com/BinomialLLC/basis_universal/wiki) + +For more useful links, papers, and tools/libraries, see the end of the [UASTC HDR 4x4 texture specification](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-HDR-4x4-Texture-Specification#papersfurther-reading). ---- diff --git a/external/basis_universal/all_builds.py b/external/basis_universal/all_builds.py new file mode 100644 index 0000000000..005c73e7ea --- /dev/null +++ b/external/basis_universal/all_builds.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +import subprocess +import shutil +import os +import sys + +# ------------------------------------------------------------------- +# CONFIGURATION - Easily add new build directories and options. +# ------------------------------------------------------------------- +BUILD_CONFIGS = { + "build_python": ["cmake", "-DBASISU_SSE=1 -DBASISU_BUILD_PYTHON=ON", ".."], + "build_wasm_mt": ["cmake", "-DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk-pthread.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=ON", ".."], + "build_wasm_st": ["cmake", "-DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=OFF", ".."], + "build_native": ["cmake", "-DBASISU_SSE=1", ".."] +} +# ------------------------------------------------------------------- + + +def log(msg): + print(f"[INFO] {msg}") + + +def run(cmd, work_dir): + """ + Execute a shell command after changing the working directory. + Always restore the original directory, even on exceptions. + """ + + if isinstance(cmd, list): + cmd = " ".join(cmd) + + original_dir = os.getcwd() + + log(f"Preparing to run command:\n CMD: {cmd}\n IN: {work_dir}") + print(f"[INFO] Current working directory before change: {original_dir}") + + try: + os.chdir(work_dir) + print(f"[INFO] Changed working directory to: {os.getcwd()}") + + log(f"Running command: {cmd}") + subprocess.check_call(cmd, shell=True) + + except subprocess.CalledProcessError: + log(f"ERROR: Command failed: {cmd}") + raise + + finally: + # Always restore the directory + os.chdir(original_dir) + print(f"[INFO] Restored working directory to: {original_dir}") + + +def clean_build_dirs(): + log("Cleaning all build directories...") + for build_dir in BUILD_CONFIGS: + if os.path.isdir(build_dir): + log(f"Deleting directory: {build_dir}") + shutil.rmtree(build_dir) + else: + log(f"Directory not found, skipping: {build_dir}") + log("Clean complete.\n") + + +def create_dir(path): + if not os.path.isdir(path): + log(f"Creating directory: {path}") + os.makedirs(path) + else: + log(f"Directory already exists: {path}") + + +def perform_builds(): + for build_dir, cmake_cmd in BUILD_CONFIGS.items(): + log(f"Starting build in: {build_dir}") + + create_dir(build_dir) + + # Run CMake inside the directory + log(f"Executing CMake for {build_dir}") + run(cmake_cmd, work_dir=build_dir) + + # Run Make inside the directory + log(f"Running make for {build_dir}") + run("make", work_dir=build_dir) + + log(f"Finished build for {build_dir}\n") + + +def main(): + if "--clean" in sys.argv: + clean_build_dirs() + + perform_builds() + log("SUCCESS\n") + + +if __name__ == "__main__": + main() diff --git a/external/basis_universal/appveyor.yml b/external/basis_universal/appveyor.yml index 9e35a55971..ad9be959a2 100644 --- a/external/basis_universal/appveyor.yml +++ b/external/basis_universal/appveyor.yml @@ -11,19 +11,51 @@ environment: APPVEYOR_YML_DISABLE_PS_LINUX: true build_script: + # ============================ + # Windows (PowerShell) + # ============================ - ps: | New-Item -Path . -Name "build" -ItemType "directory" cd build + cmake --version + cmake ../ -DCMAKE_BUILD_TYPE:STRING="$env:CONFIGURATION" + if ($LASTEXITCODE -ne 0) { + Write-Host "ERROR: CMake configuration failed" + exit $LASTEXITCODE + } + cmake --build . --config $env:CONFIGURATION + if ($LASTEXITCODE -ne 0) { + Write-Host "ERROR: Build failed" + exit $LASTEXITCODE + } + cd ../ + + + # ============================ + # Linux + macOS (sh) + # ============================ - sh: | mkdir build cd build + cmake --version + cmake ../ -DCMAKE_BUILD_TYPE:STRING="${CONFIGURATION}" + if [ $? -ne 0 ]; then + echo "ERROR: CMake configuration failed" + exit 1 + fi + cmake --build . --config ${CONFIGURATION} + if [ $? -ne 0 ]; then + echo "ERROR: Build failed" + exit 1 + fi + cd ../ artifacts: @@ -32,4 +64,4 @@ artifacts: # MacOS - path: bin/basisu # Windows - - path: bin\$(configuration)\basisu.exe + - path: bin\$(CONFIGURATION)\basisu.exe diff --git a/external/basis_universal/basisu.sln b/external/basis_universal/basisu.sln index 2309ae59ab..566f85c43f 100644 --- a/external/basis_universal/basisu.sln +++ b/external/basis_universal/basisu.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.8.34322.80 +# Visual Studio Version 18 +VisualStudioVersion = 18.0.11222.15 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "basisu", "basisu.vcxproj", "{59586A07-8E7E-411D-BC3D-387E039AA423}" EndProject @@ -9,6 +9,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example\example. EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "encoder_lib", "encoder_lib\encoder_lib.vcxproj", "{97C34996-F458-4030-A402-B32C581872F1}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_transcoding", "example_transcoding\example_transcoding.vcxproj", "{13333092-FCFE-4D74-8E76-F10C6037593C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_capi", "example_capi\example_capi.vcxproj", "{BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64EC = Debug|ARM64EC @@ -55,6 +59,30 @@ Global {97C34996-F458-4030-A402-B32C581872F1}.Release|x64.Build.0 = Release|x64 {97C34996-F458-4030-A402-B32C581872F1}.Release|x86.ActiveCfg = Release|Win32 {97C34996-F458-4030-A402-B32C581872F1}.Release|x86.Build.0 = Release|Win32 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|ARM64EC.Build.0 = Debug|ARM64EC + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|x64.ActiveCfg = Debug|x64 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|x64.Build.0 = Debug|x64 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|x86.ActiveCfg = Debug|Win32 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Debug|x86.Build.0 = Debug|Win32 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|ARM64EC.ActiveCfg = Release|ARM64EC + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|ARM64EC.Build.0 = Release|ARM64EC + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|x64.ActiveCfg = Release|x64 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|x64.Build.0 = Release|x64 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|x86.ActiveCfg = Release|Win32 + {13333092-FCFE-4D74-8E76-F10C6037593C}.Release|x86.Build.0 = Release|Win32 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|ARM64EC.Build.0 = Debug|ARM64EC + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|x64.ActiveCfg = Debug|x64 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|x64.Build.0 = Debug|x64 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|x86.ActiveCfg = Debug|Win32 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Debug|x86.Build.0 = Debug|Win32 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|ARM64EC.ActiveCfg = Release|ARM64EC + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|ARM64EC.Build.0 = Release|ARM64EC + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|x64.ActiveCfg = Release|x64 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|x64.Build.0 = Release|x64 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|x86.ActiveCfg = Release|Win32 + {BE889347-E4FD-47DD-BBF4-81F98FAA8BA9}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/external/basis_universal/basisu.vcxproj b/external/basis_universal/basisu.vcxproj index fce24bde5f..1e52b46772 100644 --- a/external/basis_universal/basisu.vcxproj +++ b/external/basis_universal/basisu.vcxproj @@ -1,4 +1,4 @@ - + @@ -36,40 +36,40 @@ Application true MultiByte - v143 + v145 Application false true MultiByte - v143 + v145 Application true Unicode - v143 + v145 Application true Unicode - v143 + v145 Application false true Unicode - v143 + v145 Application false true Unicode - v143 + v145 @@ -129,7 +129,7 @@ true OpenCL _MBCS;%(PreprocessorDefinitions);BASISU_SUPPORT_SSE=1;BASISU_SUPPORT_OPENCL=1;_HAS_EXCEPTIONS=0 - StreamingSIMDExtensions2 + AdvancedVectorExtensions stdcpp17 @@ -147,9 +147,9 @@ true true _MBCS;%(PreprocessorDefinitions);BASISU_SUPPORT_SSE=1;BASISU_SUPPORT_OPENCL=1; - StreamingSIMDExtensions2 - Level4 + AdvancedVectorExtensions stdcpp17 + Level4 Console @@ -186,7 +186,7 @@ NDEBUG;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions);BASISU_SUPPORT_SSE=1;BASISU_SUPPORT_OPENCL=1 false AnySuitable - StreamingSIMDExtensions2 + AdvancedVectorExtensions Precise false true @@ -217,7 +217,7 @@ Precise true Speed - StreamingSIMDExtensions2 + AdvancedVectorExtensions false stdcpp17 @@ -266,7 +266,10 @@ {97c34996-f458-4030-a402-b32c581872f1} + + + - + \ No newline at end of file diff --git a/external/basis_universal/basisu.vcxproj.filters b/external/basis_universal/basisu.vcxproj.filters index a772c989f3..5281d4839a 100644 --- a/external/basis_universal/basisu.vcxproj.filters +++ b/external/basis_universal/basisu.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -6,4 +6,7 @@ - + + + + \ No newline at end of file diff --git a/external/basis_universal/basisu_tool.cpp b/external/basis_universal/basisu_tool.cpp index 4c63f461dd..67941e6fb1 100644 --- a/external/basis_universal/basisu_tool.cpp +++ b/external/basis_universal/basisu_tool.cpp @@ -1,4 +1,4 @@ -// basisu_tool.cpp +// basisu_tool.cpp // Copyright (C) 2019-2025 Binomial LLC. All Rights Reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #if _MSC_VER -// For sprintf(), strcpy() +// For sprintf(), strcpy() #define _CRT_SECURE_NO_WARNINGS (1) #pragma warning(disable:4505) // unreferenced function with internal linkage has been removed #pragma warning(disable:4189) // local variable is initialized but not referenced @@ -30,6 +30,7 @@ #include "transcoder/basisu_transcoder.h" #include "encoder/basisu_ssim.h" #include "encoder/basisu_opencl.h" +#include "encoder/basisu_astc_ldr_common.h" #define MINIZ_HEADER_FILE_ONLY #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES @@ -44,6 +45,7 @@ #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include +// Work around Win11 debug console bug. DO NOT SHIP SET TO 1. #define CLEAR_WIN32_CONSOLE 0 #endif @@ -55,7 +57,21 @@ using namespace basisu; using namespace buminiz; -#define BASISU_TOOL_VERSION "1.60.0" +#define BASISU_TOOL_VERSION "2.10.0" + +#if defined(DEBUG) +#pragma message("DEBUG defined") +#endif + +#if defined(_DEBUG) +#pragma message("_DEBUG defined") +#endif + +#ifndef NDEBUG +#if !defined(DEBUG) && !defined(_DEBUG) +#pragma message("NDEBUG is NOT defined, but DEBUG or _DEBUG are also NOT defined, which isn't ideal as extra debug assertion checks will not be compiled.") +#endif +#endif // Define to lower the -test and -test_hdr tolerances //#define USE_TIGHTER_TEST_TOLERANCES @@ -79,6 +95,7 @@ enum tool_mode cTestHDR_4x4, cTestHDR_6x6, cTestHDR_6x6i, + cTestXUASTCLDR, cCLBench, cSplitImage, cCombineImages, @@ -89,197 +106,8 @@ static void print_usage() { printf("\nUsage: basisu filename [filename ...] \n"); - puts("\n" - "The default processing mode is compression of one or more .PNG/.BMP/.TGA/.JPG/.QOI/.DDS/.EXR/.HDR files to a LDR or HDR .KTX2 file. Alternate modes:\n" - " -unpack: Use transcoder to unpack a .basis/.KTX2 file to one or more .KTX/.PNG files\n" - " -validate: Validate and display information about a .basis/.KTX2 file\n" - " -info: Display high-level information about a .basis/.KTX2 file\n" - " -compare: Compare two LDR PNG/BMP/TGA/JPG/QOI images specified with -file, output PSNR and SSIM statistics and RGB/A delta images\n" - " -compare_hdr: Compare two HDR .EXR/.HDR images specified with -file, output PSNR statistics and RGB delta images\n" - " -tonemap: Tonemap an HDR or EXR image to PNG at multiple exposures, use -file to specify filename\n" - " -version: Print version and exit\n" - "\n" - "--- Notes:\n" - "\nUnless an explicit mode is specified, if one or more files have the .basis or .KTX2 extension this tool defaults to unpack mode.\n" - "\nBy default, the compressor assumes the input is in the sRGB colorspace (like photos/albedo textures).\n" - "If the input is NOT sRGB (like a normal map), be sure to specify -linear for less artifacts. Depending on the content type, some experimentation may be needed.\n" - "\n" - "The TinyEXR library is used to read .EXR images. This library does not support all .EXR compression methods. For unsupported images, you can use ImageMagick to convert them to uncompressed .EXR.\n" - "\n" - "For .DDS source files: Mipmapped or not mipmapped 2D textures (but not cubemaps) are supported. Only uncompressed 32-bit RGBA/BGRA, half float RGBA, or float RGBA .DDS files are supported. In -tex_array mode, if a .DDS file is specified, all source files must be in .DDS format.\n" - "\n" - "Filenames prefixed with a @ symbol are read as filename listing files. Listing text files specify which actual filenames to process (one filename per line).\n" - "\n" - "--- Texture Mode Options:\n" - " -etc1s: Encode to ETC1S LDR (the default for SDR/LDR inputs). Roughly .8-2.5 bpp.\n" - " -uastc: Encode to UASTC LDR 4x4. Roughly 5-8 bpp.\n" - " -hdr/-hdr_4x4: Encode input as UASTC HDR 4x4 (the default if any input file has the .EXR or .HDR extension, or if any .DDS file is HDR). Roughly 5-8 bpp.\n" - " -hdr_6x6: Encode input as RDO or highest quality UASTC HDR 6x6. Use -lambda X (try 100-20000 or higher) option to enable RDO UASTC HDR 6x6, where x controls the quality vs. size tradeoff. Roughly 1.2-3.2 bpp.\n" - " -hdr_6x6i: Encode input as UASTC HDR 6x6 intermediate. Use -lambda X (try 100-20000 or higher) option to enable RDO UASTC HDR 6x6, where x controls the quality vs. size tradeoff. Roughly 1-3.2 bpp.\n" - "\n" - "--- Options:\n" - " -ktx2: Write .KTX2 files (the default). By default, UASTC LDR/HDR 4x4 and ASTC 6x6 files will be compressed using Zstandard unless -ktx2_no_zstandard is specified.\n" - " -basis: Write .basis files instead of .KTX2 files (the previous default).\n" - " -file filename.png/tga/jpg/qoi/exr/hdr: Input image filename, multiple images are OK, use -file X for each input filename (prefixing input filenames with -file is optional)\n" - " -alpha_file filename.png/tga/jpg/qoi: Input alpha image filename, multiple images are OK, use -file X for each input filename (must be paired with -file), images converted to REC709 grayscale and used as input alpha\n" - " -output_file filename: Output .basis/.KTX2 filename\n" - " -output_path: Output .basis/.KTX2 files to specified directory.\n" - " -debug or -verbose: Enable codec debug print to stdout (slightly slower).\n" - " -debug_images: Enable codec debug images (much slower).\n" - " -stats: Compute and display image quality metrics (slightly to much slower).\n" - " -individual: Process input images individually and output multiple .basis/.KTX2 files (not as a texture array - this is now the default as of v1.16)\n" - "\n" - " -fastest: Set UASTC LDR 4x4 and HDR 4x4/6x6 to fastest but lowest quality encoding mode (same as -uastc_level 0 or -hdr_6x6_level 0)\n" - " -slower: Set UASTC LDR 4x4 and HDR 4x4/6x6 to slower but a higher quality encoding mode (same as -uastc_level 3 or -hdr_6x6_level 5)\n" - " -parallel: Compress multiple textures simumtanously (one per thread), instead of one at a time. Compatible with OpenCL mode. This is much faster, but in OpenCL mode the driver is pushed harder, and the CLI output will be jumbled.\n" - " -linear: Use linear colorspace metrics (instead of the default sRGB or scaled RGB for HDR), and by default linear (not sRGB) mipmap filtering.\n" - " -tex_type <2d, 2darray, 3d, video, cubemap>: Set Basis file header's texture type field. Cubemap arrays require multiples of 6 images, in X+, X-, Y+, Y-, Z+, Z- order, each image must be the same resolutions.\n" - " 2d=arbitrary 2D images, 2darray=2D array, 3D=volume texture slices, video=video frames, cubemap=array of faces. For 2darray/3d/cubemaps/video, each source image's dimensions and # of mipmap levels must be the same.\n" - " For video, the .basis file will be written with the first frame being an I-Frame, and subsequent frames being P-Frames (using conditional replenishment). Playback must always occur in order from first to last image.\n" - " -cubemap: same as -tex_type cubemap\n" - " -tex_array: Process input images as a single texture array and write a single .basis/.KTX2 file (the former default before v1.16)\n" - " -fuzz_testing: Use with -validate: Disables CRC16 validation of file contents before transcoding\n" - " -multifile_printf: printf() format strint to use to compose multiple filenames\n" - " -multifile_first: The index of the first file to process, default is 0 (must specify -multifile_printf and -multifile_num)\n" - " -multifile_num: The total number of files to process.\n" - " -opencl: Enable OpenCL usage (currently only accelerates ETC1S encoding)\n" - " -opencl_serialize: Serialize all calls to the OpenCL driver (to work around buggy drivers, only useful with -parallel)\n" - "\n" - "--- ETC1S specific options (-etc1s - the LDR/SDR default):\n" - " -q X: Set ETC1S quality level, 1-255, default is 128, lower=better compression/lower quality/faster, higher=less compression/higher quality/slower, default is 128. For even higher quality, use -max_endpoints/-max_selectors.\n" - " -comp_level X: Set ETC1S encoding speed vs. quality tradeoff. Range is 0-6, default is 1. Higher values=MUCH slower, but slightly higher quality. Higher levels intended for videos. Use -q first!\n" - " -max_endpoints X: ETC1S: Manually set the max number of color endpoint clusters from 1-16128, use instead of -q\n" - " -max_selectors X: ETC1S: Manually set the max number of color selector clusters from 1-16128, use instead of -q\n" - "\n" - "--- UASTC LDR/HDR 4x4 specific options (-uastc):\n" - " -uastc: Enable UASTC LDR 4x4 texture mode, instead of the default ETC1S mode. Significantly higher texture quality, but much larger (~8bpp) files. (Note that UASTC .basis files must be losslessly compressed by the user.)\n" - " -uastc_level: Set UASTC LDR/HDR 4x4 encoding level. LDR Range is [0,4], default is 2, higher=slower but higher quality. 0=fastest/lowest quality, 3=slowest practical option, 4=impractically slow/highest achievable quality\n" - " UASTC HDR 4x4 range is [0,4] - higher=slower but higher quality. HDR 4x4 default level=1.\n" - " -uastc_rdo_l X: Enable UASTC LDR 4x4 RDO post-processing and set UASTC LDR 4x4 RDO quality scalar (lambda) to X. Lower values=higher quality/larger LZ\n" - " compressed files, higher values=lower quality/smaller LZ compressed files. Good range to try is [.25-10].\n" - " Note: Previous versons used the -uastc_rdo_q option, which was removed because the RDO algorithm was changed.\n" - " -uastc_rdo_d X: Set UASTC LDR 4x4 RDO dictionary size in bytes. Default is 4096, max is 65536. Lower values=faster, but less compression.\n" - " -uastc_rdo_b X: Set UASTC LDR 4x4 RDO max smooth block error scale. Range is [1,300]. Default is 10.0, 1.0=disabled. Larger values suppress more artifacts (and allocate more bits) on smooth blocks.\n" - " -uastc_rdo_s X: Set UASTC LDR 4x4 RDO max smooth block standard deviation. Range is [.01,65536]. Default is 18.0. Larger values expand the range of blocks considered smooth.\n" - " -uastc_rdo_f: Don't favor simpler UASTC LDR 4x4 modes in RDO mode.\n" - " -uastc_rdo_m: Disable RDO multithreading (slightly higher compression, deterministic).\n" - "\n" - "--- UASTC HDR 4x4 specific options (-hdr or -hdr_4x4 - the HDR default):\n" - " -uastc_level X: Sets the UASTC HDR 4x4 compressor's level. Valid range is [0,4] - higher=slower but higher quality. HDR default=1.\n" - " Level 0=fastest/lowest quality, 3=highest practical setting, 4=exhaustive\n" - " -hdr_uber_mode: Allow the UASTC HDR 4x4 encoder to try varying the CEM 11 selectors more for slightly higher quality (slower). This may negatively impact BC6H quality, however.\n" - " -hdr_ultra_quant: UASTC HDR 4x4: Try to find better quantized CEM 7/11 endpoint values (slower).\n" - " -hdr_favor_astc: UASTC HDR 4x4: By default the UASTC HDR 4x4 encoder tries to strike a balance or even slightly favor BC6H quality. If this option is specified, ASTC HDR 4x4 quality is favored instead.\n" - "\n" - "--- UASTC HDR 6x6 specific options (-hdr_6x6 or -hdr_6x6i):\n" - " -lambda X: Enables rate distortion optimization (RDO). The higher this value, the lower the quality, but the smaller the file size. Try 100-20000, or higher values on some images.\n" - " -hdr_6x6_level X: Sets the codec to 6x6 HDR mode (same as -hdr_6x6) and controls encoder performance vs. max quality tradeoff. X may range from [0,12]. Default level is 2. Higher values result in better quality but slower encoding. Values above 10 are extremely slow.\n" - " -hdr_6x6i_level X: Sets the codec to 6x6 HDR intermediate mode (same as -hdr_6x6i) and controls encoder performance vs. max quality tradeoff. X may range from [0,12]. Default level is 2.\n" - " -rec_2020: The input image's gamut is Rec. 2020 vs. the default Rec. 709 - for accurate colorspace error calculations.\n" - " -hdr_6x6_jnd X, -hdr_6x6_extra_pats, -hdr_6x6_brute_force_pats, -hdr_6x6_comp_levels X Y or -hdr_6x6i_comp_levels X Y: Low-level control over the encoder's configuration.\n" - "\n" - "--- SDR/LDR->HDR upconversion options (only used when encoding to HDR formats from an LDR/SDR source image):\n" - " -hdr_ldr_no_srgb_to_linear: If specified, LDR images will NOT be converted to normalized linear light (via a sRGB->Linear conversion) during SDR->HDR upconversion before compressing as HDR.\n" - " -hdr_ldr_upconversion_nit_multiplier X: Specify how many nits (candelas per sq. meter) LDR/SDR images are converted to after converting to linear light. Default is 100 nits. Note: Previous builds used 1 nit.\n" - "\n" - "--- More options:\n" - " -test: Run an automated LDR ETC1S/UASTC encoding and transcoding test. Returns EXIT_FAILURE if any failures\n" - " -test_hdr_4x4/-test_hdr_6x6/-test_hdr_6x6i: Run automated UASTC HDR encoding and transcoding tests. Returns EXIT_FAILURE if any failures\n" - " -test_dir: Optional directory of test files. Defaults to \"../test_files\".\n" - " -y_flip: Flip input images vertically before compression\n" - " -normal_map: Tunes codec parameters for better quality on normal maps (linear colorspace metrics, linear mipmap filtering, no selector RDO, no sRGB)\n" - " -no_alpha: Always output non-alpha basis files, even if one or more inputs has alpha\n" - " -force_alpha: Always output alpha basis files, even if no inputs has alpha\n" - " -separate_rg_to_color_alpha: Separate input R and G channels to RGB and A (for tangent space XY normal maps)\n" - " -swizzle rgba: Specify swizzle for the 4 input color channels using r, g, b and a (the -separate_rg_to_color_alpha flag is equivalent to rrrg)\n" - " -renorm: Renormalize each input image before any further processing/compression\n" - " -no_multithreading: Disable multithreading\n" - " -max_threads X: Use at most X threads total when multithreading is enabled (this includes the main thread)\n" - " -no_ktx: Disable KTX writing when unpacking (faster, less output files)\n" - " -ktx_only: Only write KTX files when unpacking (faster, less output files)\n" - " -write_out: Write 3dfx OUT files when unpacking FXT1 textures\n" - " -format_only: Only unpack the specified format, by its numeric code.\n" - " -etc1_only: Only unpack to ETC1, skipping the other texture formats during -unpack\n" - " -disable_hierarchical_endpoint_codebooks: Disable hierarchical endpoint codebook usage, slower but higher quality on some compression levels\n" - " -compare_ssim: Compute and display SSIM of image comparison (slow)\n" - " -compare_plot: Display histogram plots in -compare mode\n" - " -bench: UASTC benchmark mode, for development only\n" - " -resample X Y: Resample all input textures to XxY pixels using a box filter\n" - " -resample_factor X: Resample all input textures by scale factor X using a box filter\n" - " -no_sse: Forbid all SSE instruction set usage\n" - " -validate_etc1s: Validate internal ETC1S compressor's data structures during compression (slower, intended for development).\n" - " -ktx2_animdata_duration X: Set KTX2animData duration field to integer value X (only valid/useful for -tex_type video, default is 1)\n" - " -ktx2_animdata_timescale X: Set KTX2animData timescale field to integer value X (only valid/useful for -tex_type video, default is 15)\n" - " -ktx2_animdata_loopcount X: Set KTX2animData loopcount field to integer value X (only valid/useful for -tex_type video, default is 0)\n" - " -framerate X: Set framerate in .basis header to X/frames sec.\n" - " -ktx2_no_zstandard: Don't compress UASTC texture data using Zstandard -- store it uncompressed instead.\n" - " -ktx2_zstandard_level X: Set ZStandard compression level to X (see Zstandard documentation, default level is 6)\n" - " -tonemap_dither: Dither tonemapper's 8-bit/component output by adding a small amount of white noise, only used with -tonemap mode\n" - "\n" - "--- Mipmap generation options:\n" - " -mipmap: Generate mipmaps for each source image\n" - " -mip_srgb: Convert image to linear before filtering, then back to sRGB\n" - " -mip_linear: Keep image in linear light during mipmap filtering (i.e. do not convert to/from sRGB for filtering purposes)\n" - " -mip_scale X: Set mipmap filter kernel's scale, lower=sharper, higher=more blurry, default is 1.0\n" - " -mip_filter X: Set mipmap filter kernel, default is kaiser, filters: box, tent, bell, blackman, catmullrom, mitchell, etc.\n" - " -mip_renorm: Renormalize normal map to unit length vectors after filtering\n" - " -mip_clamp: Use clamp addressing on borders, instead of wrapping\n" - " -mip_fast: Use faster mipmap generation (resample from previous mip, not always first/largest mip level). The default (as of 1/2021)\n" - " -mip_slow: Always resample each mipmap level starting from the largest mipmap. Higher quality, but slower. Opposite of -mip_fast. Was the prior default before 1/2021.\n" - " -mip_smallest X: Set smallest pixel dimension for generated mipmaps, default is 1 pixel\n" - " By default, textures will be converted from sRGB to linear light before mipmap filtering, then back to sRGB (for the RGB color channels) unless -linear is specified.\n" - " You can override this behavior with -mip_srgb/-mip_linear.\n" - "\n" - "--- ETC1S backend endpoint/selector RDO codec options:\n" - " -no_selector_rdo: Disable backend's selector rate distortion optimizations (slightly faster, less noisy output, but lower quality per output bit)\n" - " -selector_rdo_thresh X: Set selector RDO quality threshold, default is 1.25, lower is higher quality but less quality per output bit (try 1.0-3.0)\n" - " -no_endpoint_rdo: Disable backend's endpoint rate distortion optimizations (slightly faster, less noisy output, but lower quality per output bit)\n" - " -endpoint_rdo_thresh X: Set endpoint RDO quality threshold, default is 1.5, lower is higher quality but less quality per output bit (try 1.0-3.0)\n" - "\n" - "--- Set various fields in the Basis file header:\n" - " -userdata0 X: Set 32-bit userdata0 field in Basis file header to X (X is a signed 32-bit int)\n" - " -userdata1 X: Set 32-bit userdata1 field in Basis file header to X (X is a signed 32-bit int)\n" - "\n" - "--- Example LDR ETC1S/UASTC LDR 4x4 command lines:\n" - " basisu x.png : Compress sRGB image x.png to x.ktx2 using default settings (multiple filenames OK, use -tex_array if you want a tex array vs. multiple output files)\n" - " basisu -basis x.qoi : Compress sRGB image x.qoi to x.basis (supports 24-bit or 32-bit .QOI files)\n" - " basisu x.ktx2 : Unpack x.basis to PNG/KTX files (multiple filenames OK)\n" - " basisu x.basis : Unpack x.basis to PNG/KTX files (multiple filenames OK)\n" - " basisu -uastc x.png -uastc_rdo_l 2.0 -ktx2 -stats : Compress to a UASTC .KTX2 file with RDO (rate distortion optimization) to reduce .KTX2 compressed file size\n" - " basisu -file x.png -mipmap -y_flip : Compress a mipmapped x.ktx2 file from an sRGB image named x.png, Y flip each source image\n" - " basisu -validate -file x.basis : Validate x.basis (check header, check file CRC's, attempt to transcode all slices)\n" - " basisu -unpack -file x.basis : Validates, transcodes and unpacks x.basis to mipmapped .KTX and RGB/A .PNG files (transcodes to all supported GPU texture formats)\n" - " basisu -q 255 -file x.png -mipmap -debug -stats : Compress sRGB x.png to x.ktx2 at quality level 255 with compressor debug output/statistics\n" - " basisu -linear -max_endpoints 16128 -max_selectors 16128 -file x.png : Compress non-sRGB x.png to x.ktx2 using the largest supported manually specified codebook sizes\n" - " basisu -basis -comp_level 2 -max_selectors 8192 -max_endpoints 8192 -tex_type video -framerate 20 -multifile_printf \"x%02u.png\" -multifile_first 1 -multifile_num 20 : Compress a 20 sRGB source image video sequence (x01.png, x02.png, x03.png, etc.) to x01.basis\n" - "\n" - "--- Example UASTC HDR 4x4 command lines:\n" - " basisu x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR 4x4 .KTX2 file. LDR/SDR images will be upconverted to linear light HDR before compression. See HDR upconversion options, above.\n" - " basisu -hdr_4x4 x.exr : Compress a HDR .EXR image to a UASTC HDR 4x4 .KTX2 file.\n" - " basisu x.hdr -uastc_level 0 : Compress a HDR .hdr image to a UASTC HDR 4x4 .KTX2 file, fastest encoding but lowest quality\n" - " basisu -hdr x.png : Compress a LDR .PNG image to UASTC HDR 4x4 (image is converted from sRGB to linear light first, use -hdr_ldr_no_srgb_to_linear to disable)\n" - " basisu x.hdr -uastc_level 3 : Compress a HDR .hdr image to UASTC HDR 4x4 at higher quality (-uastc_level 4 is highest quality, but very slow encoding)\n" - " basisu x.hdr -uastc_level 3 -mipmap -basis -stats -debug -debug_images : Compress a HDR .hdr image to UASTC HDR 4x4, .basis output file, at higher quality, generate mipmaps, output statistics and debug information, and write tone mapped debug images\n" - " basisu x.hdr -stats -hdr_favor_astc -hdr_uber_mode -uastc_level 4 : Highest achievable ASTC HDR 4x4 quality (very slow encoding, BC6H quality is traded off)\n" - "\n--- Example UASTC HDR 6x6 command lines:\n" - " basisu -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR 6x6 .KTX2 file. LDR/SDR images will be upconverted to linear light HDR before compression. See HDR upconversion options, above.\n" - " basisu -lambda 1000 -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR 6x6 .KTX2 file with rate-distortion optimization (RDO), at lambda level 1000.\n" - " basisu -hdr_6x6i x.exr : Compress a HDR .EXR image to a compressed intermediate format UASTC HDR 6x6 .KTX2 file.\n" - " basisu -lambda 1000 -hdr_6x6i x.exr : Compress a HDR .EXR image to a compressed intermediate format UASTC HDR 6x6 .KTX2 file with rate-distortion optimization (RDO), at lambda level 1000.\n" - "\n" - "--- Video notes: For video use, it's recommended to encode on a machine with many cores. Use -comp_level 2 or higher for better codebook\n" - "generation, specify very large codebooks using -max_endpoints and -max_selectors, and reduce the default endpoint RDO threshold\n" - "(-endpoint_rdo_thresh) to around 1.25. Videos may have mipmaps and alpha channels. Videos must always be played back by the transcoder\n" - "in first to last image order.\n" - "Video files currently use I-Frames on the first image, and P-Frames using conditional replenishment on subsequent frames.\n" - "\nETC1S Compression level (-comp_level X) details. This controls the ETC1S speed vs. quality trandeoff. (Use -q to control the quality vs. compressed size tradeoff.):\n" - " Level 0: Fastest, but has marginal quality and can be brittle on complex images. Avg. Y dB: 35.45\n" - " Level 1: Hierarchical codebook searching, faster ETC1S encoding. 36.87 dB, ~1.4x slower vs. level 0. (This is the default setting.)\n" - " Level 2: Use this or higher for video. Hierarchical codebook searching. 36.87 dB, ~1.4x slower vs. level 0. (This is the v1.12's default setting.)\n" - " Level 3: Full codebook searching. 37.13 dB, ~1.8x slower vs. level 0. (Equivalent the the initial release's default settings.)\n" - " Level 4: Hierarchical codebook searching, codebook k-means iterations. 37.15 dB, ~4x slower vs. level 0\n" - " Level 5: Full codebook searching, codebook k-means iterations. 37.41 dB, ~5.5x slower vs. level 0.\n" - " Level 6: Full codebook searching, twice as many codebook k-means iterations, best ETC1 endpoint opt. 37.43 dB, ~12x slower vs. level 0\n" + puts( +#include "basisu_tool_help.h" ); } @@ -327,7 +155,7 @@ static bool load_listing_file(const std::string &f, basisu::vector { if (read_filename[0] == ' ') read_filename.erase(0, 1); - else + else break; } @@ -336,7 +164,7 @@ static bool load_listing_file(const std::string &f, basisu::vector const char c = read_filename.back(); if ((c == ' ') || (c == '\n') || (c == '\r')) read_filename.erase(read_filename.size() - 1, 1); - else + else break; } @@ -360,9 +188,347 @@ class command_line_params #define REMAINING_ARGS_CHECK(n) if (num_remaining_args < (n)) { error_printf("Error: Expected %u values to follow %s!\n", n, pArg); return false; } + bool check_for_general_options(const char** arg_v, const char* pArg, int arg_index, const int num_remaining_args, int& arg_count) + { + BASISU_NOTE_UNUSED(arg_v); + BASISU_NOTE_UNUSED(arg_index); + BASISU_NOTE_UNUSED(num_remaining_args); + BASISU_NOTE_UNUSED(arg_count); + + if (strcasecmp(pArg, "-wasi_threads") == 0) + { + REMAINING_ARGS_CHECK(1); + int num_threads = atoi(arg_v[arg_index + 1]); + if ((num_threads < 0) || (num_threads > 256)) + { + error_printf("Invalid number of threads\n"); + exit(EXIT_FAILURE); + } + set_num_wasi_threads(num_threads); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-higher_quality_transcoding") == 0) + { + m_higher_quality_transcoding = true; + return true; + } + else if (strcasecmp(pArg, "-no_fast_xuastc_ldr_bc7_transcoding") == 0) + { + m_xuastc_ldr_disable_bc7_transcoding = true; + return true; + } + else if (strcasecmp(pArg, "-fast_xuastc_ldr_bc7_transcoding") == 0) + { + m_xuastc_ldr_disable_bc7_transcoding = false; + return true; + } + else if (strcasecmp(pArg, "-no_etc1s_chroma_filtering") == 0) + { + m_no_etc1s_transcoding_chroma_filtering = true; + return true; + } + else if (strcasecmp(pArg, "-force_deblocking") == 0) + { + m_force_deblocking = true; + return true; + } + else if ((strcasecmp(pArg, "-disable_deblocking") == 0) || (strcasecmp(pArg, "-no_deblocking") == 0)) + { + m_disable_deblocking = true; + return true; + } + else if (strcasecmp(pArg, "-stronger_deblocking") == 0) + { + m_stronger_deblocking = true; + return true; + } + + return false; + } + + bool check_for_xuastc_options(const char** arg_v, const char* pArg, int arg_index, const int num_remaining_args, int& arg_count) + { + // New unified -quality level which works across all codecs + if (strcasecmp(pArg, "-quality") == 0) + { + REMAINING_ARGS_CHECK(1); + m_quality_level = clamp(atoi(arg_v[arg_index + 1]), 0, 100); + arg_count++; + return true; + } + // New unified -effort level, which works across all codecs + else if (strcasecmp(pArg, "-effort") == 0) + { + REMAINING_ARGS_CHECK(1); + m_effort_level = clamp(atoi(arg_v[arg_index + 1]), 0, 10); + //m_comp_params.m_xuastc_ldr_effort_level = atoi(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-xuastc_blurring") == 0) // experimental, not recommended, very slow + { + m_comp_params.m_xuastc_ldr_blurring = true; + return true; + } + else if (strcasecmp(pArg, "-weights") == 0) + { + REMAINING_ARGS_CHECK(4); + m_comp_params.m_xuastc_ldr_channel_weights[0] = (uint32_t)clamp((float)atof(arg_v[arg_index + 1]), 0.0f, 1024.0f); + m_comp_params.m_xuastc_ldr_channel_weights[1] = (uint32_t)clamp((float)atof(arg_v[arg_index + 2]), 0.0f, 1024.0f); + m_comp_params.m_xuastc_ldr_channel_weights[2] = (uint32_t)clamp((float)atof(arg_v[arg_index + 3]), 0.0f, 1024.0f); + m_comp_params.m_xuastc_ldr_channel_weights[3] = (uint32_t)clamp((float)atof(arg_v[arg_index + 4]), 0.0f, 1024.0f); + arg_count += 4; + return true; + } + else if (strcasecmp(pArg, "-ls_min_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_min_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-ls_min_alpha_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_min_alpha_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-ls_thresh_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_thresh_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-ls_thresh_alpha_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_thresh_alpha_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-ls_thresh_edge_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_thresh_edge_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-ls_thresh_edge_alpha_psnr") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_ls_thresh_edge_alpha_psnr = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-xuastc_arith") == 0) + { + m_comp_params.m_xuastc_ldr_syntax = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith; + return true; + } + else if (strcasecmp(pArg, "-xuastc_zstd") == 0) + { + m_comp_params.m_xuastc_ldr_syntax = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd; + return true; + } + else if (strcasecmp(pArg, "-xuastc_hybrid") == 0) + { + m_comp_params.m_xuastc_ldr_syntax = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cHybridArithZStd; + return true; + } + else if (strcasecmp(pArg, "-xy") == 0) + { + m_comp_params.m_xuastc_ldr_use_lossy_supercompression = true; + return true; + } + else if (strcasecmp(pArg, "-xyd") == 0) + { + m_comp_params.m_xuastc_ldr_use_lossy_supercompression = false; + return true; + } + else if (strcasecmp(pArg, "-xs") == 0) + { + m_comp_params.m_xuastc_ldr_force_disable_subsets = true; + return true; + } + else if (strcasecmp(pArg, "-xsu") == 0) + { + m_comp_params.m_xuastc_ldr_force_disable_subsets = false; + return true; + } + else if (strcasecmp(pArg, "-xp") == 0) + { + m_comp_params.m_xuastc_ldr_force_disable_rgb_dual_plane = true; + return true; + } + else if (strcasecmp(pArg, "-xpu") == 0) + { + m_comp_params.m_xuastc_ldr_force_disable_rgb_dual_plane = false; + return true; + } + else if (strcasecmp(pArg, "-ts") == 0) + { + m_comp_params.m_perceptual = true; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = true; + return true; + } + else if (strcasecmp(pArg, "-tl") == 0) + { + m_comp_params.m_perceptual = false; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = false; + return true; + } + // Supercompressed XUASTC LDR 4x4-12x12 + else if ((strcasecmp(pArg, "-ldr_4x4i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_4x4") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_4x4); + return true; + } + else if ((strcasecmp(pArg, "-ldr_5x4i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_5x4") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_5x4); + return true; + } + else if ((strcasecmp(pArg, "-ldr_5x5i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_5x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_5x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_6x5i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_6x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_6x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_6x6i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_6x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_6x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x5i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_8x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_8x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x6i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_8x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_8x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x5i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_10x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_10x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x6i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_10x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_10x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x8i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_8x8") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_8x8); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x8i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_10x8") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_10x8); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x10i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_10x10") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_10x10); + return true; + } + else if ((strcasecmp(pArg, "-ldr_12x10i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_12x10") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_12x10); + return true; + } + else if ((strcasecmp(pArg, "-ldr_12x12i") == 0) || (strcasecmp(pArg, "-xuastc_ldr_12x12") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cXUASTC_LDR_12x12); + return true; + } + // Plain ASTC LDR 4x4-12x12 + else if ((strcasecmp(pArg, "-ldr_4x4") == 0) || (strcasecmp(pArg, "-astc_ldr_4x4") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_4x4); + return true; + } + else if ((strcasecmp(pArg, "-ldr_5x4") == 0) || (strcasecmp(pArg, "-astc_ldr_5x4") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_5x4); + return true; + } + else if ((strcasecmp(pArg, "-ldr_5x5") == 0) || (strcasecmp(pArg, "-astc_ldr_5x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_5x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_6x5") == 0) || (strcasecmp(pArg, "-astc_ldr_6x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_6x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_6x6") == 0) || (strcasecmp(pArg, "-astc_ldr_6x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_6x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x5") == 0) || (strcasecmp(pArg, "-astc_ldr_8x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_8x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x6") == 0) || (strcasecmp(pArg, "-astc_ldr_8x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_8x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x5") == 0) || (strcasecmp(pArg, "-astc_ldr_10x5") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_10x5); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x6") == 0) || (strcasecmp(pArg, "-astc_ldr_10x6") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_10x6); + return true; + } + else if ((strcasecmp(pArg, "-ldr_8x8") == 0) || (strcasecmp(pArg, "-astc_ldr_8x8") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_8x8); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x8") == 0) || (strcasecmp(pArg, "-astc_ldr_10x8") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_10x8); + return true; + } + else if ((strcasecmp(pArg, "-ldr_10x10") == 0) || (strcasecmp(pArg, "-astc_ldr_10x10") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_10x10); + return true; + } + else if ((strcasecmp(pArg, "-ldr_12x10") == 0) || (strcasecmp(pArg, "-astc_ldr_12x10") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_12x10); + return true; + } + else if ((strcasecmp(pArg, "-ldr_12x12") == 0) || (strcasecmp(pArg, "-astc_ldr_12x12") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_LDR_12x12); + return true; + } + + return false; + } + bool check_for_hdr_options(const char** arg_v, const char* pArg, int arg_index, const int num_remaining_args, int& arg_count) { - if ((strcasecmp(pArg, "-hdr") == 0) || (strcasecmp(pArg, "-hdr_4x4") == 0)) + if ((strcasecmp(pArg, "-hdr") == 0) || (strcasecmp(pArg, "-hdr_4x4") == 0) || (strcasecmp(pArg, "-uastc_hdr_4x4") == 0)) { m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC_HDR_4x4); return true; @@ -372,16 +538,28 @@ class command_line_params m_comp_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = true; return true; } - else if (strcasecmp(pArg, "-hdr_6x6") == 0) + else if (strcasecmp(pArg, "-hdr_6x6i_16_compatibility") == 0) + { + // UASTC HDR 6x6i: Write v1.60 compatible files vs. 2.0. + m_comp_params.m_astc_hdr_6x6_options.m_write_basisu_1_6_compatible_files = true; + return true; + } + else if (strcasecmp(pArg, "-hdr_6x6i_20_compatibility") == 0) + { + // UASTC HDR 6x6i: Write v2.00 compatible files vs. 2.0. + m_comp_params.m_astc_hdr_6x6_options.m_write_basisu_1_6_compatible_files = false; + return true; + } + else if ((strcasecmp(pArg, "-hdr_6x6") == 0) || (strcasecmp(pArg, "-astc_hdr_6x6") == 0)) { // max quality (if -lambda=0) or RDO UASTC HDR 6x6 m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6); return true; } - else if (strcasecmp(pArg, "-hdr_6x6i") == 0) + else if ((strcasecmp(pArg, "-hdr_6x6i") == 0) || (strcasecmp(pArg, "-uastc_hdr_6x6") == 0) || (strcasecmp(pArg, "-uastc_hdr_6x6i") == 0)) { // intermediate format UASTC HDR 6x6 - m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); return true; } else if (strcasecmp(pArg, "-lambda") == 0) @@ -401,6 +579,8 @@ class command_line_params m_comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = (float)atof(arg_v[arg_index + 1]); m_comp_params.m_rdo_uastc_ldr_4x4 = true; + m_used_old_style_codec_config_param = true; + arg_count++; return true; } @@ -418,6 +598,9 @@ class command_line_params const int level = atoi(arg_v[arg_index + 1]); m_comp_params.m_astc_hdr_6x6_options.set_user_level(level); m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6); + + m_used_old_style_codec_config_param = true; + arg_count++; return true; } @@ -426,7 +609,10 @@ class command_line_params REMAINING_ARGS_CHECK(1); const int level = atoi(arg_v[arg_index + 1]); m_comp_params.m_astc_hdr_6x6_options.set_user_level(level); - m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); + + m_used_old_style_codec_config_param = true; + arg_count++; return true; } @@ -444,16 +630,19 @@ class command_line_params { REMAINING_ARGS_CHECK(2); + // Intended for low-level/development/testing const int lo_level = clamp(atoi(arg_v[arg_index + 1]), 0, astc_6x6_hdr::ASTC_HDR_6X6_MAX_COMP_LEVEL); const int hi_level = clamp(atoi(arg_v[arg_index + 2]), 0, astc_6x6_hdr::ASTC_HDR_6X6_MAX_COMP_LEVEL); m_comp_params.m_astc_hdr_6x6_options.m_master_comp_level = minimum(lo_level, hi_level); m_comp_params.m_astc_hdr_6x6_options.m_highest_comp_level = maximum(lo_level, hi_level); - + if (strcasecmp(pArg, "-hdr_6x6_comp_levels") == 0) m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6); else - m_comp_params.set_format_mode(basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); + + m_used_old_style_codec_config_param = true; arg_count += 2; return true; @@ -514,36 +703,223 @@ class command_line_params return false; } -public: - command_line_params() : - m_mode(cDefault), - m_ktx2_mode(true), - m_ktx2_zstandard(true), - m_ktx2_zstandard_level(6), - m_ktx2_animdata_duration(1), - m_ktx2_animdata_timescale(15), - m_ktx2_animdata_loopcount(0), - m_format_only(-1), - m_multifile_first(0), - m_multifile_num(0), - m_max_threads(1024), // surely this is high enough - m_individual(true), - m_no_ktx(false), - m_ktx_only(false), - m_write_out(false), - m_etc1_only(false), - m_fuzz_testing(false), - m_compare_ssim(false), - m_compare_plot(false), - m_parallel_compression(false), - m_tonemap_dither_flag(false) + // ETC1S or UASTC LDR 4x4 specific options + bool check_for_etc1s_or_uastc_options(const char** arg_v, const char* pArg, int arg_index, const int num_remaining_args, int& arg_count) { - m_comp_params.m_compression_level = basisu::maximum(0, BASISU_DEFAULT_COMPRESSION_LEVEL - 1); + if (strcasecmp(pArg, "-etc1s") == 0) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cETC1S); + return true; + } + else if ((strcasecmp(pArg, "-uastc") == 0) || (strcasecmp(pArg, "-uastc_ldr") == 0) || (strcasecmp(pArg, "-uastc_ldr_4x4") == 0)) + { + m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC_LDR_4x4); + return true; + } + else if (strcasecmp(pArg, "-uastc_level") == 0) + { + REMAINING_ARGS_CHECK(1); - m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(uastc_hdr_4x4_codec_options::cDefaultLevel); + int uastc_level = atoi(arg_v[arg_index + 1]); - m_test_file_dir = "../test_files"; - } + uastc_level = clamp(uastc_level, 0, TOTAL_PACK_UASTC_LEVELS - 1); + + static_assert(TOTAL_PACK_UASTC_LEVELS == 5, "TOTAL_PACK_UASTC_LEVELS==5"); + static const uint32_t s_level_flags[TOTAL_PACK_UASTC_LEVELS] = { cPackUASTCLevelFastest, cPackUASTCLevelFaster, cPackUASTCLevelDefault, cPackUASTCLevelSlower, cPackUASTCLevelVerySlow }; + + m_comp_params.m_pack_uastc_ldr_4x4_flags &= ~cPackUASTCLevelMask; + m_comp_params.m_pack_uastc_ldr_4x4_flags |= s_level_flags[uastc_level]; + + m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(uastc_level); + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_l") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = (float)atof(arg_v[arg_index + 1]); + m_comp_params.m_rdo_uastc_ldr_4x4 = true; + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_d") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_rdo_uastc_ldr_4x4_dict_size = atoi(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_b") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_s") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_f") == 0) + { + m_comp_params.m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode = false; + return true; + } + else if (strcasecmp(pArg, "-uastc_rdo_m") == 0) + { + m_comp_params.m_rdo_uastc_ldr_4x4_multithreading = false; + return true; + } + else if (strcasecmp(pArg, "-validate_etc1s") == 0) + { + m_comp_params.m_validate_etc1s = true; + return true; + } + else if (strcasecmp(pArg, "-comp_level") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_etc1s_compression_level = atoi(arg_v[arg_index + 1]); + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-max_endpoints") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_etc1s_max_endpoint_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_ENDPOINT_CLUSTERS); + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-max_selectors") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_etc1s_max_selector_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_SELECTOR_CLUSTERS); + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } +#if 0 + else if (strcasecmp(pArg, "-gen_global_codebooks") == 0) + { + // TODO + } +#endif + else if (strcasecmp(pArg, "-use_global_codebooks") == 0) + { + REMAINING_ARGS_CHECK(1); + m_etc1s_use_global_codebooks_file = arg_v[arg_index + 1]; + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-etc1_only") == 0) + { + m_etc1_only = true; + m_unpack_format_only = (int)basist::transcoder_texture_format::cTFETC1_RGB; + return true; + } + else if (strcasecmp(pArg, "-disable_hierarchical_endpoint_codebooks") == 0) + { + m_comp_params.m_disable_hierarchical_endpoint_codebooks = true; + return true; + } + else if (strcasecmp(pArg, "-q") == 0) // old-style -q, prefer -quality instead + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_quality_level = clamp(atoi(arg_v[arg_index + 1]), BASISU_QUALITY_MIN, BASISU_QUALITY_MAX); + + m_used_old_style_codec_config_param = true; + + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-no_selector_rdo") == 0) + { + m_comp_params.m_no_selector_rdo = true; + return true; + } + else if (strcasecmp(pArg, "-selector_rdo_thresh") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_selector_rdo_thresh = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + else if (strcasecmp(pArg, "-no_endpoint_rdo") == 0) + { + m_comp_params.m_no_endpoint_rdo = true; + return true; + } + else if (strcasecmp(pArg, "-endpoint_rdo_thresh") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_endpoint_rdo_thresh = (float)atof(arg_v[arg_index + 1]); + arg_count++; + return true; + } + + return false; + } + +public: + command_line_params() : + m_mode(cDefault), + m_ktx2_mode(true), + m_ktx2_zstandard(true), + m_ktx2_zstandard_level(6), + m_ktx2_animdata_duration(1), + m_ktx2_animdata_timescale(15), + m_ktx2_animdata_loopcount(0), + m_unpack_format_only(-1), + m_multifile_first(0), + m_multifile_num(0), + m_max_threads(1024), // surely this is high enough + m_individual(true), + m_no_ktx(false), + m_ktx_only(false), + m_write_out(false), + m_etc1_only(false), + m_fuzz_testing(false), + m_compare_ssim(false), + m_compare_plot(false), + m_parallel_compression(false), + m_tonemap_dither_flag(false), + m_xuastc_ldr_disable_bc7_transcoding(false), + m_no_etc1s_transcoding_chroma_filtering(false), + m_higher_quality_transcoding(false), + m_force_deblocking(false), + m_disable_deblocking(false), + m_stronger_deblocking(false), + m_effort_level(-1), + m_quality_level(-1), + m_used_old_style_codec_config_param(false) + { + // This command line tool defaults to ETC1S level 1, not 2 which is the API default (for backwards compat). + m_comp_params.m_etc1s_compression_level = maximum((int)BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL - 1, 0); + + m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(uastc_hdr_4x4_codec_options::cDefaultLevel); + + // Default to sRGB colorspace metrics/transfer functions (independent of the code defaults). + m_comp_params.m_perceptual = true; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = true; + + m_test_file_dir = "../test_files"; + } bool parse(int arg_c, const char **arg_v) { @@ -559,8 +935,20 @@ class command_line_params print_usage(); exit(EXIT_SUCCESS); } - - if (strcasecmp(pArg, "-ktx2") == 0) + + if (check_for_etc1s_or_uastc_options(arg_v, pArg, arg_index, num_remaining_args, arg_count)) + { + } + else if (check_for_hdr_options(arg_v, pArg, arg_index, num_remaining_args, arg_count)) + { + } + else if (check_for_xuastc_options(arg_v, pArg, arg_index, num_remaining_args, arg_count)) + { + } + else if (check_for_general_options(arg_v, pArg, arg_index, num_remaining_args, arg_count)) + { + } + else if (strcasecmp(pArg, "-ktx2") == 0) { m_ktx2_mode = true; } @@ -626,6 +1014,8 @@ class command_line_params m_mode = cCompSize; else if ((strcasecmp(pArg, "-test") == 0) || (strcasecmp(pArg, "-test_ldr") == 0)) m_mode = cTestLDR; + else if ((strcasecmp(pArg, "-test_xuastc") == 0) || (strcasecmp(pArg, "-test_xuastc_ldr") == 0)) + m_mode = cTestXUASTCLDR; else if (strcasecmp(pArg, "-test_hdr_4x4") == 0) m_mode = cTestHDR_4x4; else if (strcasecmp(pArg, "-test_hdr_6x6") == 0) @@ -646,7 +1036,7 @@ class command_line_params g_cpu_supports_sse41 = false; #endif } - else if (strcasecmp(pArg, "-no_status_output") == 0) + else if ((strcasecmp(pArg, "-no_status_output") == 0) || (strcasecmp(pArg, "-quiet") == 0)) { m_comp_params.m_status_output = false; } @@ -680,50 +1070,6 @@ class command_line_params m_multifile_num = atoi(arg_v[arg_index + 1]); arg_count++; } - else if (strcasecmp(pArg, "-uastc") == 0) - { - m_comp_params.set_format_mode(basist::basis_tex_format::cUASTC4x4); - } - else if (strcasecmp(pArg, "-etc1s") == 0) - { - m_comp_params.set_format_mode(basist::basis_tex_format::cETC1S); - } - else if (strcasecmp(pArg, "-fastest") == 0) - { - m_comp_params.m_pack_uastc_ldr_4x4_flags &= ~cPackUASTCLevelMask; - m_comp_params.m_pack_uastc_ldr_4x4_flags |= cPackUASTCLevelFastest; - - m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(0); - - m_comp_params.m_astc_hdr_6x6_options.set_user_level(0); - } - else if (strcasecmp(pArg, "-slower") == 0) - { - m_comp_params.m_pack_uastc_ldr_4x4_flags &= ~cPackUASTCLevelMask; - m_comp_params.m_pack_uastc_ldr_4x4_flags |= cPackUASTCLevelSlower; - - m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(3); - - m_comp_params.m_astc_hdr_6x6_options.set_user_level(5); - } - else if (strcasecmp(pArg, "-uastc_level") == 0) - { - REMAINING_ARGS_CHECK(1); - - int uastc_level = atoi(arg_v[arg_index + 1]); - - uastc_level = clamp(uastc_level, 0, TOTAL_PACK_UASTC_LEVELS - 1); - - static_assert(TOTAL_PACK_UASTC_LEVELS == 5, "TOTAL_PACK_UASTC_LEVELS==5"); - static const uint32_t s_level_flags[TOTAL_PACK_UASTC_LEVELS] = { cPackUASTCLevelFastest, cPackUASTCLevelFaster, cPackUASTCLevelDefault, cPackUASTCLevelSlower, cPackUASTCLevelVerySlow }; - - m_comp_params.m_pack_uastc_ldr_4x4_flags &= ~cPackUASTCLevelMask; - m_comp_params.m_pack_uastc_ldr_4x4_flags |= s_level_flags[uastc_level]; - - m_comp_params.m_uastc_hdr_4x4_options.set_quality_level(uastc_level); - - arg_count++; - } else if (strcasecmp(pArg, "-resample") == 0) { REMAINING_ARGS_CHECK(2); @@ -737,44 +1083,15 @@ class command_line_params m_comp_params.m_resample_factor = (float)atof(arg_v[arg_index + 1]); arg_count++; } - else if (strcasecmp(pArg, "-uastc_rdo_l") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = (float)atof(arg_v[arg_index + 1]); - m_comp_params.m_rdo_uastc_ldr_4x4 = true; - arg_count++; - } - else if (strcasecmp(pArg, "-uastc_rdo_d") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_rdo_uastc_ldr_4x4_dict_size = atoi(arg_v[arg_index + 1]); - arg_count++; - } - else if (strcasecmp(pArg, "-uastc_rdo_b") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale = (float)atof(arg_v[arg_index + 1]); - arg_count++; - } - else if (strcasecmp(pArg, "-uastc_rdo_s") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev = (float)atof(arg_v[arg_index + 1]); - arg_count++; - } - else if (strcasecmp(pArg, "-uastc_rdo_f") == 0) - m_comp_params.m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode = false; - else if (strcasecmp(pArg, "-uastc_rdo_m") == 0) - m_comp_params.m_rdo_uastc_ldr_4x4_multithreading = false; else if (strcasecmp(pArg, "-linear") == 0) + { m_comp_params.m_perceptual = false; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = false; + } else if (strcasecmp(pArg, "-srgb") == 0) - m_comp_params.m_perceptual = true; - else if (strcasecmp(pArg, "-q") == 0) { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_etc1s_quality_level = clamp(atoi(arg_v[arg_index + 1]), BASISU_QUALITY_MIN, BASISU_QUALITY_MAX); - arg_count++; + m_comp_params.m_perceptual = true; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = true; } else if (strcasecmp(pArg, "-output_file") == 0) { @@ -793,10 +1110,6 @@ class command_line_params m_comp_params.m_debug = true; enable_debug_printf(true); } - else if (strcasecmp(pArg, "-validate_etc1s") == 0) - { - m_comp_params.m_validate_etc1s = true; - } else if (strcasecmp(pArg, "-validate_output") == 0) { m_comp_params.m_validate_output_data = true; @@ -805,39 +1118,12 @@ class command_line_params m_comp_params.m_debug_images = true; else if (strcasecmp(pArg, "-stats") == 0) m_comp_params.m_compute_stats = true; - else if (strcasecmp(pArg, "-gen_global_codebooks") == 0) - { - // TODO - } - else if (strcasecmp(pArg, "-use_global_codebooks") == 0) - { - REMAINING_ARGS_CHECK(1); - m_etc1s_use_global_codebooks_file = arg_v[arg_index + 1]; - arg_count++; - } - else if (strcasecmp(pArg, "-comp_level") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_compression_level = atoi(arg_v[arg_index + 1]); - arg_count++; - } - else if (strcasecmp(pArg, "-max_endpoints") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_etc1s_max_endpoint_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_ENDPOINT_CLUSTERS); - arg_count++; - } - else if (strcasecmp(pArg, "-max_selectors") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_etc1s_max_selector_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_SELECTOR_CLUSTERS); - arg_count++; - } else if (strcasecmp(pArg, "-y_flip") == 0) m_comp_params.m_y_flip = true; else if (strcasecmp(pArg, "-normal_map") == 0) { m_comp_params.m_perceptual = false; + m_comp_params.m_ktx2_and_basis_srgb_transfer_function = false; m_comp_params.m_mip_srgb = false; m_comp_params.m_no_selector_rdo = true; m_comp_params.m_no_endpoint_rdo = true; @@ -894,7 +1180,7 @@ class command_line_params else if (strcasecmp(pArg, "-max_threads") == 0) { REMAINING_ARGS_CHECK(1); - m_max_threads = atoi(arg_v[arg_index + 1]); + m_max_threads = maximum(1, atoi(arg_v[arg_index + 1])); arg_count++; } else if (strcasecmp(pArg, "-mipmap") == 0) @@ -910,19 +1196,9 @@ class command_line_params else if (strcasecmp(pArg, "-format_only") == 0) { REMAINING_ARGS_CHECK(1); - m_format_only = atoi(arg_v[arg_index + 1]); + m_unpack_format_only = atoi(arg_v[arg_index + 1]); arg_count++; } - else if (strcasecmp(pArg, "-etc1_only") == 0) - { - m_etc1_only = true; - m_format_only = (int)basist::transcoder_texture_format::cTFETC1_RGB; - } - else if (strcasecmp(pArg, "-disable_hierarchical_endpoint_codebooks") == 0) - m_comp_params.m_disable_hierarchical_endpoint_codebooks = true; - else if (check_for_hdr_options(arg_v, pArg, arg_index, num_remaining_args, arg_count)) - { - } else if (strcasecmp(pArg, "-opencl") == 0) { m_comp_params.m_use_opencl = true; @@ -940,7 +1216,7 @@ class command_line_params { REMAINING_ARGS_CHECK(1); m_comp_params.m_mip_filter = arg_v[arg_index + 1]; - // TODO: Check filter + // TODO: Check filter arg_count++; } else if (strcasecmp(pArg, "-mip_renorm") == 0) @@ -961,22 +1237,6 @@ class command_line_params m_comp_params.m_mip_srgb = true; else if (strcasecmp(pArg, "-mip_linear") == 0) m_comp_params.m_mip_srgb = false; - else if (strcasecmp(pArg, "-no_selector_rdo") == 0) - m_comp_params.m_no_selector_rdo = true; - else if (strcasecmp(pArg, "-selector_rdo_thresh") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_selector_rdo_thresh = (float)atof(arg_v[arg_index + 1]); - arg_count++; - } - else if (strcasecmp(pArg, "-no_endpoint_rdo") == 0) - m_comp_params.m_no_endpoint_rdo = true; - else if (strcasecmp(pArg, "-endpoint_rdo_thresh") == 0) - { - REMAINING_ARGS_CHECK(1); - m_comp_params.m_endpoint_rdo_thresh = (float)atof(arg_v[arg_index + 1]); - arg_count++; - } else if (strcasecmp(pArg, "-userdata0") == 0) { REMAINING_ARGS_CHECK(1); @@ -1068,20 +1328,28 @@ class command_line_params arg_index += arg_count; assert(arg_index <= arg_c); } - - if (m_comp_params.m_etc1s_quality_level != -1) + + if (m_comp_params.m_quality_level != -1) // old-style -q X option { m_comp_params.m_etc1s_max_endpoint_clusters = 0; m_comp_params.m_etc1s_max_selector_clusters = 0; + + // -q also controls XUASTC LDR weight grid DCT quality level + m_comp_params.m_xuastc_ldr_use_dct = true; + + // Automatically enable lossy XUASTC supercompression if DCT is enabled. + if (!m_comp_params.m_xuastc_ldr_use_lossy_supercompression.was_changed()) + m_comp_params.m_xuastc_ldr_use_lossy_supercompression = true; } else if ((!m_comp_params.m_etc1s_max_endpoint_clusters) || (!m_comp_params.m_etc1s_max_selector_clusters)) { m_comp_params.m_etc1s_max_endpoint_clusters = 0; m_comp_params.m_etc1s_max_selector_clusters = 0; - m_comp_params.m_etc1s_quality_level = 128; + m_comp_params.m_quality_level = 128; } - + + // Ensure mip_srgb is set to match the perceptual flag if the user didn't explicitly set it. if (!m_comp_params.m_mip_srgb.was_changed()) { // They didn't specify what colorspace to do mipmap filtering in, so choose sRGB if they've specified that the texture is sRGB. @@ -1091,6 +1359,30 @@ class command_line_params m_comp_params.m_mip_srgb = false; } + // Handle new-style unified effort and quality levels across all codecs. + // We have so many codecs now that it's necessary to unify the primary quality/effort controls otherwise it's too confusing. + // If they've specified either -effort or -quality, assume they want the new unified API. + // If they haven't specified either, they get the old parameters/options. + if ((m_effort_level != -1) || (m_quality_level != -1)) + { + if (m_used_old_style_codec_config_param) + { + fmt_printf("WARNING: Mixing old and new-style (-effort and/or -quality) codec configuration parameters.\nNew-style parameters may overwrite your old-style codec configuration settings.\nPrefer using -effort X and -quality X.\n"); + } + + const bool lossy_supercompression_changed = m_comp_params.m_xuastc_ldr_use_lossy_supercompression.was_changed(); + const bool lossy_supercompression_value = m_comp_params.m_xuastc_ldr_use_lossy_supercompression; + + // Set the new-style effort/quality level, but importantly don't override any settings already changed if they haven't explictly specified -effort or -quality. + m_comp_params.set_format_mode_and_quality_effort(m_comp_params.get_format_mode(), m_quality_level, m_effort_level, false); + + // Allow the user to override the lossy supercompression setting, independent of the quality/effort levels. + if (lossy_supercompression_changed) + { + m_comp_params.m_xuastc_ldr_use_lossy_supercompression = lossy_supercompression_value; + } + } + return true; } @@ -1121,28 +1413,28 @@ class command_line_params new_input_alpha_filenames.push_back(m_input_alpha_filenames[i]); } new_input_alpha_filenames.swap(m_input_alpha_filenames); - + return true; } basis_compressor_params m_comp_params; - + tool_mode m_mode; - + bool m_ktx2_mode; bool m_ktx2_zstandard; int m_ktx2_zstandard_level; uint32_t m_ktx2_animdata_duration; uint32_t m_ktx2_animdata_timescale; uint32_t m_ktx2_animdata_loopcount; - + basisu::vector m_input_filenames; basisu::vector m_input_alpha_filenames; std::string m_output_filename; std::string m_output_path; - int m_format_only; + int m_unpack_format_only; std::string m_multifile_printf; uint32_t m_multifile_first; @@ -1153,9 +1445,9 @@ class command_line_params std::string m_etc1s_use_global_codebooks_file; std::string m_test_file_dir; - + uint32_t m_max_threads; - + bool m_individual; bool m_no_ktx; bool m_ktx_only; @@ -1166,19 +1458,29 @@ class command_line_params bool m_compare_plot; bool m_parallel_compression; bool m_tonemap_dither_flag; + bool m_xuastc_ldr_disable_bc7_transcoding; + bool m_no_etc1s_transcoding_chroma_filtering; + bool m_higher_quality_transcoding; + bool m_force_deblocking; + bool m_disable_deblocking; + bool m_stronger_deblocking; + + int m_effort_level; + int m_quality_level; + bool m_used_old_style_codec_config_param; // true if the user has specified low-level or old-style codec configuration parameters }; static bool expand_multifile(command_line_params &opts) { if (!opts.m_multifile_printf.size()) return true; - + if (!opts.m_multifile_num) { error_printf("-multifile_printf specified, but not -multifile_num\n"); return false; } - + std::string fmt(opts.m_multifile_printf); // Workaround for MSVC debugger issues. Questionable to leave in here. size_t x = fmt.find_first_of('!'); @@ -1190,15 +1492,15 @@ static bool expand_multifile(command_line_params &opts) error_printf("Must include C-style printf() format character '%%' in -multifile_printf string\n"); return false; } - + for (uint32_t i = opts.m_multifile_first; i < opts.m_multifile_first + opts.m_multifile_num; i++) { char buf[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), fmt.c_str(), i); #else snprintf(buf, sizeof(buf), fmt.c_str(), i); -#endif +#endif if (buf[0]) opts.m_input_filenames.push_back(buf); @@ -1209,8 +1511,8 @@ static bool expand_multifile(command_line_params &opts) struct basis_data { - basis_data() : - m_transcoder() + basis_data() : + m_transcoder() { } uint8_vec m_file_data; @@ -1227,7 +1529,7 @@ static basis_data *load_basis_file(const char *pInput_filename, bool force_etc1s delete p; return nullptr; } - printf("Input file \"%s\"\n", pInput_filename); + printf("\nInput file \"%s\"\n", pInput_filename); if (!basis_data.size()) { error_printf("File is empty!\n"); @@ -1258,6 +1560,24 @@ static basis_data *load_basis_file(const char *pInput_filename, bool force_etc1s return p; } +static uint32_t get_transcode_flags_from_options(const command_line_params& opts) +{ + uint32_t transcode_flags = opts.m_higher_quality_transcoding ? basist::cDecodeFlagsHighQuality : 0; + + if (opts.m_disable_deblocking) + transcode_flags |= basist::cDecodeFlagsNoDeblockFiltering; + else if (opts.m_force_deblocking) + transcode_flags |= basist::cDecodeFlagsForceDeblockFiltering; + if (opts.m_stronger_deblocking) + transcode_flags |= basist::cDecodeFlagsStrongerDeblockFiltering; + if (opts.m_no_etc1s_transcoding_chroma_filtering) + transcode_flags |= basist::cDecodeFlagsNoETC1SChromaFiltering; + if (opts.m_xuastc_ldr_disable_bc7_transcoding) + transcode_flags |= basist::cDecodeFlagXUASTCLDRDisableFastBC7Transcoding; + + return transcode_flags; +} + static bool compress_mode(command_line_params &opts) { uint32_t num_threads = 1; @@ -1265,17 +1585,18 @@ static bool compress_mode(command_line_params &opts) if (opts.m_comp_params.m_multithreading) { // We use std::thread::hardware_concurrency() as a hint to determine the default # of threads to put into a pool. - num_threads = std::thread::hardware_concurrency(); + num_threads = get_num_hardware_threads(); if (num_threads < 1) num_threads = 1; if (num_threads > opts.m_max_threads) num_threads = opts.m_max_threads; } + // num_threads is the total thread pool size, *including* the calling thread. So 1=no extra threads. job_pool compressor_jpool(opts.m_parallel_compression ? 1 : num_threads); if (!opts.m_parallel_compression) opts.m_comp_params.m_pJob_pool = &compressor_jpool; - + if (!expand_multifile(opts)) { error_printf("-multifile expansion failed!\n"); @@ -1287,7 +1608,7 @@ static bool compress_mode(command_line_params &opts) error_printf("No input files to process!\n"); return false; } - + basis_data* pGlobal_codebook_data = nullptr; if (opts.m_etc1s_use_global_codebooks_file.size()) { @@ -1297,7 +1618,7 @@ static bool compress_mode(command_line_params &opts) printf("Loaded global codebooks from .basis file \"%s\"\n", opts.m_etc1s_use_global_codebooks_file.c_str()); } - + basis_compressor_params ¶ms = opts.m_comp_params; if (opts.m_ktx2_mode) @@ -1307,9 +1628,7 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; else params.m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; - - params.m_ktx2_srgb_transfer_func = opts.m_comp_params.m_perceptual; - + if (params.m_tex_type == basist::basis_texture_type::cBASISTexTypeVideoFrames) { // Create KTXanimData key value entry @@ -1319,7 +1638,7 @@ static bool compress_mode(command_line_params &opts) const char* pAD = "KTXanimData"; kv.m_key.resize(strlen(pAD) + 1); strcpy((char*)kv.m_key.data(), pAD); - + basist::ktx2_animdata ad; ad.m_duration = opts.m_ktx2_animdata_duration; ad.m_timescale = opts.m_ktx2_animdata_timescale; @@ -1330,15 +1649,18 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_key_values.push_back(kv); } - + // TODO- expose this to command line. params.m_ktx2_zstd_supercompression_level = opts.m_ktx2_zstandard_level; } params.m_read_source_images = true; params.m_write_output_basis_or_ktx2_files = true; - params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; - + params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; + + // Get the transcode/decode flags used when validating the output by calling the transcoder from the options. + params.m_transcode_flags = get_transcode_flags_from_options(opts); + FILE *pCSV_file = nullptr; if (opts.m_csv_file.size()) { @@ -1354,7 +1676,7 @@ static bool compress_mode(command_line_params &opts) } printf("Processing %u total file(s)\n", (uint32_t)opts.m_input_filenames.size()); - + interval_timer all_tm; all_tm.start(); @@ -1403,7 +1725,7 @@ static bool compress_mode(command_line_params &opts) params.m_source_filenames = opts.m_input_filenames; params.m_source_alpha_filenames = opts.m_input_alpha_filenames; } - + if (opts.m_output_filename.size()) params.m_out_filename = opts.m_output_filename; else @@ -1459,9 +1781,11 @@ static bool compress_mode(command_line_params &opts) if (params.m_status_output) { - printf("Compression succeeded to file \"%s\" size %zu bytes in %3.3f secs\n", params.m_out_filename.c_str(), - opts.m_ktx2_mode ? c.get_output_ktx2_file().size() : c.get_output_basis_file().size(), - tm.get_elapsed_secs()); + fmt_printf("Compression succeeded to file \"{}\" size {} bytes in {3.3} secs, {3.3} bits/texel\n", + params.m_out_filename.c_str(), + opts.m_ktx2_mode ? (uint64_t)c.get_output_ktx2_file().size() : (uint64_t)c.get_output_basis_file().size(), + tm.get_elapsed_secs(), + opts.m_ktx2_mode ? c.get_ktx2_bits_per_texel() : c.get_basis_bits_per_texel()); } } else @@ -1479,6 +1803,15 @@ static bool compress_mode(command_line_params &opts) switch (ec) { + case basis_compressor::cECFailedInvalidParameters: + { + error_printf("Invalid compressor parameters (internal error)\n"); + + if (opts.m_individual) + exit_flag = false; + + break; + } case basis_compressor::cECFailedReadingSourceImages: { error_printf("Compressor failed reading a source image!\n"); @@ -1497,7 +1830,7 @@ static bool compress_mode(command_line_params &opts) case basis_compressor::cECFailedFrontEnd: error_printf("Compressor frontend stage failed!\n"); break; - case basis_compressor::cECFailedFontendExtract: + case basis_compressor::cECFailedFrontendExtract: error_printf("Compressor frontend data extraction failed!\n"); break; case basis_compressor::cECFailedBackend: @@ -1559,7 +1892,7 @@ static bool compress_mode(command_line_params &opts) fprintf(pCSV_file, "\"%s\", %u, %u, %u, %u, %u, %f, %f, %f, %f, %f, %u, %u, %f, %f, %f, %f, %f, %f, %f\n", params.m_out_filename.c_str(), - c.get_basis_file_size(), + (uint32_t)c.get_basis_file_size(), (uint32_t)c.get_stats().size(), c.get_stats()[0].m_width, c.get_stats()[0].m_height, (uint32_t)c.get_any_source_image_has_alpha(), c.get_basis_bits_per_texel(), @@ -1567,7 +1900,7 @@ static bool compress_mode(command_line_params &opts) c.get_stats()[0].m_basis_rgba_avg_psnr, c.get_stats()[0].m_basis_luma_709_psnr, c.get_stats()[0].m_best_etc1s_luma_709_psnr, - params.m_etc1s_quality_level, (int)params.m_compression_level, tm.get_elapsed_secs(), + params.m_quality_level, (int)params.m_etc1s_compression_level, tm.get_elapsed_secs(), rgb_avg_psnr_min, rgb_avg_psnr_avg, a_avg_psnr_min, a_avg_psnr_avg, luma_709_psnr_min, luma_709_psnr_avg); @@ -1591,7 +1924,7 @@ static bool compress_mode(command_line_params &opts) comp_params_vec, results); BASISU_NOTE_UNUSED(any_failed); - + for (uint32_t i = 0; i < comp_params_vec.size(); i++) { if (results[i].m_error_code != basis_compressor::cECSuccess) @@ -1600,8 +1933,8 @@ static bool compress_mode(command_line_params &opts) total_failures++; - error_printf("File %u (first source image: \"%s\", output file: \"%s\") failed with error code %i!\n", i, - comp_params_vec[i].m_source_filenames[0].c_str(), + error_printf("File %u (first source image: \"%s\", output file: \"%s\") failed with error code %i!\n", i, + comp_params_vec[i].m_source_filenames[0].c_str(), comp_params_vec[i].m_out_filename.c_str(), (int)results[i].m_error_code); } @@ -1610,11 +1943,11 @@ static bool compress_mode(command_line_params &opts) total_successes++; } } - + } // if (opts.m_parallel_compression) printf("Total successes: %u failures: %u\n", total_successes, total_failures); - + all_tm.stop(); if (total_files > 1) @@ -1625,7 +1958,7 @@ static bool compress_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return result; @@ -1641,9 +1974,8 @@ static bool unpack_and_validate_ktx2_file( uint32_t& total_unpack_warnings, uint32_t& total_pvrtc_nonpow2_warnings) { - // TODO - (void)pCSV_file; - (void)file_index; + BASISU_NOTE_UNUSED(pCSV_file); + BASISU_NOTE_UNUSED(file_index); const bool validate_flag = (opts.m_mode == cValidate); @@ -1666,7 +1998,7 @@ static bool unpack_and_validate_ktx2_file( error_printf("ktx2_transcoder::start_transcoding() failed! File either uses an unsupported feature, is invalid, was corrupted, or this is a bug.\n"); return false; } - + printf("Resolution: %ux%u\n", dec.get_width(), dec.get_height()); fmt_printf("Block size: {}x{}\n", dec.get_block_width(), dec.get_block_height()); printf("Mipmap Levels: %u\n", dec.get_levels()); @@ -1676,10 +2008,13 @@ static bool unpack_and_validate_ktx2_file( if (dec.is_hdr()) fmt_printf("LDR to HDR upconversion nit multiplier: {}\n", dec.get_ldr_hdr_upconversion_nit_multiplier()); - + const bool is_etc1s = (dec.get_basis_tex_format() == basist::basis_tex_format::cETC1S); - + bool is_hdr = false; + //bool is_xuastc_ldr = false, is_astc_ldr = false; + + std::string fmt_str_temp; const char* pFmt_str = nullptr; switch (dec.get_basis_tex_format()) @@ -1689,7 +2024,7 @@ static bool unpack_and_validate_ktx2_file( pFmt_str = "ETC1S"; break; } - case basist::basis_tex_format::cUASTC4x4: + case basist::basis_tex_format::cUASTC_LDR_4x4: { pFmt_str = "UASTC_LDR_4x4"; break; @@ -1706,10 +2041,56 @@ static bool unpack_and_validate_ktx2_file( pFmt_str = "ASTC_HDR_6x6"; break; } - case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: { is_hdr = true; - pFmt_str = "ASTC_HDR_6x6_INTERMEDIATE"; + pFmt_str = "UASTC_HDR_6x6_INTERMEDIATE"; + break; + } + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + { + //is_xuastc_ldr = true; + + uint32_t block_width = 0, block_height = 0; + basist::get_basis_tex_format_block_size(dec.get_basis_tex_format(), block_width, block_height); + fmt_str_temp = fmt_string("XUASTC_LDR_{}x{}", block_width, block_height); + pFmt_str = fmt_str_temp.c_str(); + break; + } + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + //is_astc_ldr = true; + + uint32_t block_width = 0, block_height = 0; + basist::get_basis_tex_format_block_size(dec.get_basis_tex_format(), block_width, block_height); + fmt_str_temp = fmt_string("ASTC_LDR_{}x{}", block_width, block_height); + pFmt_str = fmt_str_temp.c_str(); break; } default: @@ -1718,24 +2099,27 @@ static bool unpack_and_validate_ktx2_file( return false; } } - - printf("Supercompression Format: %s\n", pFmt_str); - - printf("Supercompression Scheme: "); + + printf("KTX2 Supercompression Scheme: "); switch (dec.get_header().m_supercompression_scheme) { case basist::KTX2_SS_NONE: printf("NONE\n"); break; case basist::KTX2_SS_BASISLZ: printf("BASISLZ\n"); break; case basist::KTX2_SS_ZSTANDARD: printf("ZSTANDARD\n"); break; + case basist::KTX2_SS_DEFLATE: printf("DEFLATE\n"); break; + case basist::KTX2_SS_UASTC_HDR_6x6I: printf("UASTC_HDR_6x6I\n"); break; + case basist::KTX2_SS_XUASTC_LDR: printf("XUASTC_LDR\n"); break; default: error_printf("Invalid/unknown/unsupported\n"); return false; } - printf("Has Alpha: %u\n", (uint32_t)dec.get_has_alpha()); + printf("Library Supercompression Format: %s\n", pFmt_str); + printf("Has Alpha: %u\n", (uint32_t)dec.get_has_alpha()); + printf("\nKTX2 header vk_format: 0x%X (decimal %u)\n", (uint32_t)dec.get_header().m_vk_format, (uint32_t)dec.get_header().m_vk_format); - + printf("\nData Format Descriptor (DFD):\n"); printf("DFD length in bytes: %zu\n", dec.get_dfd().size()); printf("DFD color model: %u\n", dec.get_dfd_color_model()); @@ -1751,8 +2135,13 @@ static bool unpack_and_validate_ktx2_file( printf("DFD chan1: %s\n", basist::ktx2_get_etc1s_df_channel_id_str(dec.get_dfd_channel_id1())); } else + { printf("DFD chan0: %s\n", basist::ktx2_get_uastc_df_channel_id_str(dec.get_dfd_channel_id0())); + } + // For proper ASTC decoding we must know which ASTC decode profile to apply (sRGB or linear). + const bool actual_ktx2_srgb_transfer_func = (dec.get_dfd_transfer_func() == basist::KTX2_KHR_DF_TRANSFER_SRGB); + printf("DFD hex values:\n"); for (uint32_t i = 0; i < dec.get_dfd().size(); i++) { @@ -1764,6 +2153,25 @@ static bool unpack_and_validate_ktx2_file( } printf("\n"); + // the sRGB transfer function to use while unpacking astc content (ideally we want this to always match what we used during astc encoding) + bool srgb_transfer_func_astc_unpacking = actual_ktx2_srgb_transfer_func; + + // the sRGB transfer function to use when writing out files (we want to indicate to the caller if the data is sRGB or linear) + bool srgb_transfer_func_astc_writing = actual_ktx2_srgb_transfer_func; + + const bool is_uastc_ldr_4x4 = (dec.get_basis_tex_format() == basist::basis_tex_format::cUASTC_LDR_4x4); + if ((is_etc1s) || (is_uastc_ldr_4x4)) + { + // The ETC1S and UASTC LDR 4x4 transcoders supply ASTC LDR 4x4 data assuming the decoder will NOT be using the sRGB read decode profile, which is likely the most common case (in geospatial rendering scenarios). + // Note XUASTC/UASTC LDR 4x4-12x12 supports both linear and sRGB decode profiles throughout the entire pipeline (encoding/transcoding/decoding to raw pixels). + srgb_transfer_func_astc_unpacking = false; + + // This matches the behavior of our original tools. It ensures astcenc uses linear by default when reading our transcoded .KTX files. + srgb_transfer_func_astc_writing = false; + + if (actual_ktx2_srgb_transfer_func) + printf("Note: ETC1S/UASTC LDR 4x4 will always be decoded by this tool using the ASTC linear decode profile, regardless of the KTX2/.basis DFD transfer function field.\n"); + } printf("Total key values: %zu\n", dec.get_key_values().size()); for (uint32_t i = 0; i < dec.get_key_values().size(); i++) @@ -1772,14 +2180,14 @@ static bool unpack_and_validate_ktx2_file( if (dec.get_key_values()[i].m_value.size() > 256) continue; - + bool is_ascii = true; for (uint32_t j = 0; j < dec.get_key_values()[i].m_value.size(); j++) { uint8_t c = dec.get_key_values()[i].m_value[j]; if (!( - ((c >= ' ') && (c < 0x80)) || - ((j == dec.get_key_values()[i].m_value.size() - 1) && (!c)) + (!c) || + ((c >= ' ') && (c < 0x80)) )) { is_ascii = false; @@ -1851,10 +2259,10 @@ static bool unpack_and_validate_ktx2_file( error_printf("Failed retrieving image level information (%u %u %u)!\n", layer_index, level_index, face_index); return false; } - + fmt_printf("--- Level Index: {}, Layer Index: {}, Face Index: {}\n", level_info.m_level_index, level_info.m_layer_index, level_info.m_face_index); - + fmt_printf("Orig width/height: {}x{}\n", level_info.m_orig_width, level_info.m_orig_height); fmt_printf("Width/height: {}x{}\n", level_info.m_width, level_info.m_height); fmt_printf("Block width/height: {}x{}\n", level_info.m_block_width, level_info.m_block_height); @@ -1878,12 +2286,14 @@ static bool unpack_and_validate_ktx2_file( int first_format = 0; int last_format = (int)basist::transcoder_texture_format::cTFTotalTextureFormats; - if (opts.m_format_only > -1) + if (opts.m_unpack_format_only > -1) { - first_format = opts.m_format_only; + first_format = opts.m_unpack_format_only; last_format = first_format + 1; } + uint32_t transcode_flags = get_transcode_flags_from_options(opts); + for (int format_iter = first_format; format_iter < last_format; format_iter++) { basist::transcoder_texture_format tex_fmt = static_cast(format_iter); @@ -1934,7 +2344,7 @@ static bool unpack_and_validate_ktx2_file( if ((transcoder_tex_fmt == basist::transcoder_texture_format::cTFPVRTC1_4_RGB) || (transcoder_tex_fmt == basist::transcoder_texture_format::cTFPVRTC1_4_RGBA)) { - if (!is_pow2(level_info.m_width) || !is_pow2(level_info.m_height)) + if (!is_pow2(level_info.m_orig_width) || !is_pow2(level_info.m_orig_height)) { total_pvrtc_nonpow2_warnings++; @@ -1952,13 +2362,11 @@ static bool unpack_and_validate_ktx2_file( // Fill the buffer with psuedo-random bytes, to help more visibly detect cases where the transcoder fails to write to part of the output. fill_buffer_with_random_bytes(gi.get_ptr(), gi.get_size_in_bytes()); - - const uint32_t decode_flags = basist::cDecodeFlagsHighQuality; - + interval_timer tm; tm.start(); - if (!dec.transcode_image_level(level_index, layer_index, face_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, decode_flags)) + if (!dec.transcode_image_level(level_index, layer_index, face_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, transcode_flags)) { error_printf("Failed transcoding image level (%u %u %u %u)!\n", layer_index, level_index, face_index, format_iter); return false; @@ -1966,7 +2374,7 @@ static bool unpack_and_validate_ktx2_file( double total_time = tm.get_elapsed_ms(); - printf("Transcode of layer %u level %u face %u res %ux%u format %s succeeded in %3.3f ms\n", layer_index, level_index, face_index, + printf("Transcode of layer %u level %u face %u res %ux%u format %s succeeded in %3.3f ms\n", layer_index, level_index, face_index, level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt), total_time); } @@ -1980,14 +2388,14 @@ static bool unpack_and_validate_ktx2_file( if (validate_flag) return true; - // Now write KTX/DDS files and unpack them to individual PNG's/EXR's + // Now write KTX/DDS/ASTC files and unpack them to individual PNG's/EXR's const bool is_cubemap = (dec.get_faces() > 1); const bool is_array = (total_layers > 1); const bool is_cubemap_array = is_cubemap && is_array; const bool is_mipmapped = dec.get_levels() > 1; BASISU_NOTE_UNUSED(is_cubemap_array); BASISU_NOTE_UNUSED(is_mipmapped); - + // The maximum Direct3D array size is 2048. const uint32_t MAX_DDS_TEXARRAY_SIZE = 2048; @@ -1995,7 +2403,7 @@ static bool unpack_and_validate_ktx2_file( { const basist::transcoder_texture_format transcoder_tex_fmt = static_cast(format_iter); const basisu::texture_format tex_fmt = basis_get_basisu_texture_format(transcoder_tex_fmt); - + if (basist::basis_transcoder_format_is_uncompressed(transcoder_tex_fmt)) continue; if (!basis_is_format_supported(transcoder_tex_fmt, dec.get_basis_tex_format())) @@ -2003,8 +2411,10 @@ static bool unpack_and_validate_ktx2_file( if (transcoder_tex_fmt == basist::transcoder_texture_format::cTFBC7_ALT) continue; - // TODO: Could write DDS texture arrays. + const bool is_fmt_astc = basis_is_transcoder_texture_format_astc(transcoder_tex_fmt); + // TODO: Could write DDS texture arrays. + // No KTX tool that we know of supports cubemap arrays, so write individual cubemap files for each layer. if ((!opts.m_no_ktx) && (is_cubemap)) { @@ -2015,22 +2425,24 @@ static bool unpack_and_validate_ktx2_file( for (uint32_t face_index = 0; face_index < 6; face_index++) cubemap.push_back(gpu_images[format_iter][face_index][layer_index]); + // Write KTX1 file { std::string ktx_filename(base_filename + string_format("_transcoded_cubemap_%s_layer_%u.ktx", basist::basis_get_format_name(transcoder_tex_fmt), layer_index)); - if (!write_compressed_texture_file(ktx_filename.c_str(), cubemap, true, true)) + if (!write_compressed_texture_file(ktx_filename.c_str(), cubemap, true, is_fmt_astc ? srgb_transfer_func_astc_writing : actual_ktx2_srgb_transfer_func)) { error_printf("Failed writing KTX file \"%s\"!\n", ktx_filename.c_str()); return false; } printf("Wrote .KTX cubemap file \"%s\"\n", ktx_filename.c_str()); } - + + // Write .DDS file if (does_dds_support_format(cubemap[0][0].get_format())) { std::string dds_filename(base_filename + string_format("_transcoded_cubemap_%s_layer_%u.dds", basist::basis_get_format_name(transcoder_tex_fmt), layer_index)); - if (!write_compressed_texture_file(dds_filename.c_str(), cubemap, true, true)) + if (!write_compressed_texture_file(dds_filename.c_str(), cubemap, true, actual_ktx2_srgb_transfer_func)) { error_printf("Failed writing DDS file \"%s\"!\n", dds_filename.c_str()); return false; @@ -2052,7 +2464,7 @@ static bool unpack_and_validate_ktx2_file( std::string dds_filename(base_filename + string_format("_transcoded_array_%s.dds", basist::basis_get_format_name(transcoder_tex_fmt))); - if (!write_compressed_texture_file(dds_filename.c_str(), tex_array, is_cubemap, true)) + if (!write_compressed_texture_file(dds_filename.c_str(), tex_array, is_cubemap, actual_ktx2_srgb_transfer_func)) { error_printf("Failed writing DDS file \"%s\"!\n", dds_filename.c_str()); return false; @@ -2061,7 +2473,7 @@ static bool unpack_and_validate_ktx2_file( } } - // Now unpack each layer and face individually and write KTX/DDS/PNG/EXR files for each + // Now unpack each layer and face individually and write KTX/DDS/ASTC/PNG/EXR/OUT files for each for (uint32_t layer_index = 0; layer_index < total_layers; layer_index++) { for (uint32_t face_index = 0; face_index < dec.get_faces(); face_index++) @@ -2090,7 +2502,7 @@ static bool unpack_and_validate_ktx2_file( else ktx_filename = base_filename + string_format("_transcoded_%s_layer_%04u.ktx", basist::basis_get_format_name(transcoder_tex_fmt), layer_index); - if (!write_compressed_texture_file(ktx_filename.c_str(), gi, true)) + if (!write_compressed_texture_file(ktx_filename.c_str(), gi, is_fmt_astc ? srgb_transfer_func_astc_writing : actual_ktx2_srgb_transfer_func)) { error_printf("Failed writing KTX file \"%s\"!\n", ktx_filename.c_str()); return false; @@ -2107,7 +2519,7 @@ static bool unpack_and_validate_ktx2_file( else dds_filename = base_filename + string_format("_transcoded_%s_layer_%04u.dds", basist::basis_get_format_name(transcoder_tex_fmt), layer_index); - if (!write_compressed_texture_file(dds_filename.c_str(), gi, true)) + if (!write_compressed_texture_file(dds_filename.c_str(), gi, actual_ktx2_srgb_transfer_func)) { error_printf("Failed writing DDS file \"%s\"!\n", dds_filename.c_str()); return false; @@ -2153,11 +2565,30 @@ static bool unpack_and_validate_ktx2_file( } printf("Wrote .EXR file \"%s\"\n", rgb_filename.c_str()); } + + // Save .astc + if ((!opts.m_ktx_only) && basist::basis_is_transcoder_texture_format_astc(transcoder_tex_fmt)) + { + std::string astc_filename; + if (gi.size() > 1) + astc_filename = base_filename + string_format("_unpacked_%s_level_%u_face_%u_layer_%04u.astc", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index); + else + astc_filename = base_filename + string_format("_unpacked_%s_face_%u_layer_%04u.astc", basist::basis_get_format_name(transcoder_tex_fmt), face_index, layer_index); + + const gpu_image& level_g = gi[level_index]; + + if (!write_astc_file(astc_filename.c_str(), level_g.get_ptr(), level_g.get_block_width(), level_g.get_block_height(), level_info.m_width, level_info.m_height)) + { + error_printf("Failed writing to .ASTC file \"%s\"\n", astc_filename.c_str()); + return false; + } + printf("Wrote .ASTC file \"%s\"\n", astc_filename.c_str()); + } } else { image u; - if (!gi[level_index].unpack(u)) + if (!gi[level_index].unpack(u, srgb_transfer_func_astc_unpacking)) { printf("Warning: Failed unpacking GPU texture data (%u %u %u %u). Unpacking as much as possible.\n", format_iter, layer_index, level_index, face_index); total_unpack_warnings++; @@ -2182,23 +2613,7 @@ static bool unpack_and_validate_ktx2_file( } printf("Wrote .PNG file \"%s\"\n", rgb_filename.c_str()); } - - // Save .OUT - if ((transcoder_tex_fmt == basist::transcoder_texture_format::cTFFXT1_RGB) && (opts.m_write_out)) - { - std::string out_filename; - if (gi.size() > 1) - out_filename = base_filename + string_format("_unpacked_rgb_%s_level_%u_face_%u_layer_%04u.out", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index); - else - out_filename = base_filename + string_format("_unpacked_rgb_%s_face_%u_layer_%04u.out", basist::basis_get_format_name(transcoder_tex_fmt), face_index, layer_index); - if (!write_3dfx_out_file(out_filename.c_str(), gi[level_index])) - { - error_printf("Failed writing to OUT file \"%s\"\n", out_filename.c_str()); - return false; - } - printf("Wrote .OUT file \"%s\"\n", out_filename.c_str()); - } - + // Save alpha if (basis_transcoder_format_has_alpha(transcoder_tex_fmt) && (!opts.m_ktx_only) && (write_png)) { @@ -2227,6 +2642,41 @@ static bool unpack_and_validate_ktx2_file( printf("Wrote .PNG file \"%s\"\n", rgba_filename.c_str()); } + // Save .astc + if ((!opts.m_ktx_only) && basist::basis_is_transcoder_texture_format_astc(transcoder_tex_fmt)) + { + std::string astc_filename; + if (gi.size() > 1) + astc_filename = base_filename + string_format("_unpacked_%s_level_%u_face_%u_layer_%04u.astc", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index); + else + astc_filename = base_filename + string_format("_unpacked_%s_face_%u_layer_%04u.astc", basist::basis_get_format_name(transcoder_tex_fmt), face_index, layer_index); + + const gpu_image& level_g = gi[level_index]; + + if (!write_astc_file(astc_filename.c_str(), level_g.get_ptr(), level_g.get_block_width(), level_g.get_block_height(), level_info.m_width, level_info.m_height)) + { + error_printf("Failed writing to .ASTC file \"%s\"\n", astc_filename.c_str()); + return false; + } + printf("Wrote .ASTC file \"%s\"\n", astc_filename.c_str()); + } + + // Save .OUT + if ((transcoder_tex_fmt == basist::transcoder_texture_format::cTFFXT1_RGB) && (opts.m_write_out)) + { + std::string out_filename; + if (gi.size() > 1) + out_filename = base_filename + string_format("_unpacked_rgb_%s_level_%u_face_%u_layer_%04u.out", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index); + else + out_filename = base_filename + string_format("_unpacked_rgb_%s_face_%u_layer_%04u.out", basist::basis_get_format_name(transcoder_tex_fmt), face_index, layer_index); + if (!write_3dfx_out_file(out_filename.c_str(), gi[level_index])) + { + error_printf("Failed writing to OUT file \"%s\"\n", out_filename.c_str()); + return false; + } + printf("Wrote .OUT file \"%s\"\n", out_filename.c_str()); + } + } // is_hdr } // level_index @@ -2237,7 +2687,7 @@ static bool unpack_and_validate_ktx2_file( } // format_iter - if ((opts.m_format_only == -1) && (!validate_flag)) + if ((opts.m_unpack_format_only == -1) && (!validate_flag)) { if (is_hdr) { @@ -2267,7 +2717,7 @@ static bool unpack_and_validate_ktx2_file( interval_timer tm; tm.start(); - if (!dec.transcode_image_level(level_index, layer_index, face_index, half_img.data(), total_pixels, transcoder_tex_fmt, 0)) + if (!dec.transcode_image_level(level_index, layer_index, face_index, half_img.data(), total_pixels, transcoder_tex_fmt, transcode_flags)) { fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); return false; @@ -2275,8 +2725,8 @@ static bool unpack_and_validate_ktx2_file( double total_transcode_time = tm.get_elapsed_ms(); - fmt_printf("Transcode of level {} layer {} face {} res {}x{} format {} succeeded in {} ms\n", - level_index, layer_index, face_index, + fmt_printf("Transcode of level {} layer {} face {} res {}x{} format {} succeeded in {} ms\n", + level_index, layer_index, face_index, level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt), total_transcode_time); if ((!validate_flag) && (!opts.m_ktx_only)) @@ -2299,7 +2749,7 @@ static bool unpack_and_validate_ktx2_file( } } // face_index - } // layer_index + } // layer_index } // level_index // RGB HALF @@ -2328,7 +2778,7 @@ static bool unpack_and_validate_ktx2_file( interval_timer tm; tm.start(); - if (!dec.transcode_image_level(level_index, layer_index, face_index, half_img.data(), total_pixels, transcoder_tex_fmt, 0)) + if (!dec.transcode_image_level(level_index, layer_index, face_index, half_img.data(), total_pixels, transcoder_tex_fmt, transcode_flags)) { fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); return false; @@ -2360,10 +2810,10 @@ static bool unpack_and_validate_ktx2_file( } } // face_index - } // layer_index + } // layer_index } // level_index - // RGB HALF + // RGB_9E5 for (uint32_t level_index = 0; level_index < dec.get_levels(); level_index++) { for (uint32_t layer_index = 0; layer_index < total_layers; layer_index++) @@ -2389,7 +2839,7 @@ static bool unpack_and_validate_ktx2_file( interval_timer tm; tm.start(); - if (!dec.transcode_image_level(level_index, layer_index, face_index, rgb9e5_img.data(), total_pixels, transcoder_tex_fmt, 0)) + if (!dec.transcode_image_level(level_index, layer_index, face_index, rgb9e5_img.data(), total_pixels, transcoder_tex_fmt, transcode_flags)) { fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); return false; @@ -2420,140 +2870,424 @@ static bool unpack_and_validate_ktx2_file( } } // face_index - } // layer_index + } // layer_index } // level_index - + } else { - // TODO: Add LDR uncompressed formats - } - } + // RGBA 32bpp + for (uint32_t level_index = 0; level_index < dec.get_levels(); level_index++) + { + for (uint32_t layer_index = 0; layer_index < total_layers; layer_index++) + { + for (uint32_t face_index = 0; face_index < dec.get_faces(); face_index++) + { + const basist::transcoder_texture_format transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGBA32; - return true; -} + basist::ktx2_image_level_info level_info; -static bool unpack_and_validate_basis_file( - uint32_t file_index, - const std::string &base_filename, - uint8_vec &basis_file_data, - command_line_params& opts, - FILE *pCSV_file, - basis_data* pGlobal_codebook_data, - uint32_t &total_unpack_warnings, - uint32_t &total_pvrtc_nonpow2_warnings) -{ - const bool validate_flag = (opts.m_mode == cValidate); + if (!dec.get_image_level_info(level_info, level_index, layer_index, face_index)) + { + fmt_error_printf("Failed retrieving image level information ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } - basist::basisu_transcoder dec; + const uint32_t total_pixels = level_info.m_orig_width * level_info.m_orig_height; - if (pGlobal_codebook_data) - { - dec.set_global_codebooks(&pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder()); - } + image img(level_info.m_orig_width, level_info.m_orig_height); - if (!opts.m_fuzz_testing) - { - // Skip the full validation, which CRC16's the entire file. + fill_buffer_with_random_bytes(img.get_ptr(), img.get_total_pixels() * sizeof(color_rgba)); - // Validate the file - note this isn't necessary for transcoding - if (!dec.validate_file_checksums(&basis_file_data[0], (uint32_t)basis_file_data.size(), true)) - { - error_printf("File version is unsupported, or file failed one or more CRC checks!\n"); + interval_timer tm; + tm.start(); - return false; - } - } + if (!dec.transcode_image_level(level_index, layer_index, face_index, img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags)) + { + fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } - printf("File version and CRC checks succeeded\n"); + double total_transcode_time = tm.get_elapsed_ms(); - basist::basisu_file_info fileinfo; - if (!dec.get_file_info(&basis_file_data[0], (uint32_t)basis_file_data.size(), fileinfo)) - { - error_printf("Failed retrieving Basis file information!\n"); - return false; - } + fmt_printf("Transcode of level {} layer {} face {} res {}x{} format {} succeeded in {} ms\n", + level_index, layer_index, face_index, + level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt), total_transcode_time); - assert(fileinfo.m_total_images == fileinfo.m_image_mipmap_levels.size()); - assert(fileinfo.m_total_images == dec.get_total_images(&basis_file_data[0], (uint32_t)basis_file_data.size())); + if ((!validate_flag) && (!opts.m_ktx_only)) + { + std::string rgba_filename(base_filename + fmt_string("_unpacked_rgba_{}_level_{}_face_{}_layer{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(rgba_filename, img, cImageSaveIgnoreAlpha)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", rgba_filename.c_str()); + return false; + } - printf("File info:\n"); - printf(" Version: %X\n", fileinfo.m_version); - printf(" Total header size: %u\n", fileinfo.m_total_header_size); - printf(" Total selectors: %u\n", fileinfo.m_total_selectors); - printf(" Selector codebook size: %u\n", fileinfo.m_selector_codebook_size); - printf(" Total endpoints: %u\n", fileinfo.m_total_endpoints); - printf(" Endpoint codebook size: %u\n", fileinfo.m_endpoint_codebook_size); - printf(" Tables size: %u\n", fileinfo.m_tables_size); - printf(" Slices size: %u\n", fileinfo.m_slices_size); - fmt_printf(" Block Dimensions: {}x{}\n", fileinfo.m_block_width, fileinfo.m_block_height); + std::string rgb_filename(base_filename + fmt_string("_unpacked_rgb_{}_level_{}_face_{}_layer{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(rgb_filename, img, cImageSaveIgnoreAlpha)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", rgb_filename.c_str()); + return false; + } + printf("Wrote .PNG file \"%s\"\n", rgb_filename.c_str()); - bool is_hdr = false; + std::string a_filename(base_filename + fmt_string("_unpacked_a_{}_{}_{}_{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(a_filename, img, cImageSaveGrayscale, 3)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", a_filename.c_str()); + return false; + } + printf("Wrote .PNG file \"%s\"\n", a_filename.c_str()); + } - const char* pFmt_str = nullptr; - switch (fileinfo.m_tex_format) - { - case basist::basis_tex_format::cETC1S: - { - pFmt_str = "ETC1S"; - break; - } - case basist::basis_tex_format::cUASTC4x4: - { - pFmt_str = "UASTC_LDR_4x4"; - break; - } - case basist::basis_tex_format::cUASTC_HDR_4x4: - { - is_hdr = true; - pFmt_str = "UASTC_HDR_4x4"; - break; - } - case basist::basis_tex_format::cASTC_HDR_6x6: - { - is_hdr = true; - pFmt_str = "ASTC_HDR_6x6"; - break; - } - case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: - { - is_hdr = true; - pFmt_str = "ASTC_HDR_6x6_INTERMEDIATE"; - break; - } - default: - { - assert(0); - return false; - } - } + } // face_index + } // layer_index + } // level_index - fmt_printf(" Texture format: {}\n", pFmt_str); + // RGB565 + for (uint32_t level_index = 0; level_index < dec.get_levels(); level_index++) + { + for (uint32_t layer_index = 0; layer_index < total_layers; layer_index++) + { + for (uint32_t face_index = 0; face_index < dec.get_faces(); face_index++) + { + const basist::transcoder_texture_format transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGB565; - printf(" Texture type: %s\n", basist::basis_get_texture_type_name(fileinfo.m_tex_type)); - printf(" us per frame: %u (%f fps)\n", fileinfo.m_us_per_frame, fileinfo.m_us_per_frame ? (1.0f / ((float)fileinfo.m_us_per_frame / 1000000.0f)) : 0.0f); - printf(" Total slices: %u\n", (uint32_t)fileinfo.m_slice_info.size()); - printf(" Total images: %i\n", fileinfo.m_total_images); - printf(" Y Flipped: %u, Has alpha slices: %u\n", fileinfo.m_y_flipped, fileinfo.m_has_alpha_slices); - printf(" userdata0: 0x%X userdata1: 0x%X\n", fileinfo.m_userdata0, fileinfo.m_userdata1); - printf(" Per-image mipmap levels: "); - for (uint32_t i = 0; i < fileinfo.m_total_images; i++) - printf("%u ", fileinfo.m_image_mipmap_levels[i]); - printf("\n"); + basist::ktx2_image_level_info level_info; - uint32_t total_texels = 0; + if (!dec.get_image_level_info(level_info, level_index, layer_index, face_index)) + { + fmt_error_printf("Failed retrieving image level information ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } - printf("\nImage info:\n"); - for (uint32_t i = 0; i < fileinfo.m_total_images; i++) - { - basist::basisu_image_info ii; - if (!dec.get_image_info(&basis_file_data[0], (uint32_t)basis_file_data.size(), ii, i)) - { - error_printf("get_image_info() failed!\n"); - return false; - } + const uint32_t total_pixels = level_info.m_orig_width * level_info.m_orig_height; + + basisu::vector packed_img(total_pixels); + + fill_buffer_with_random_bytes(packed_img.get_ptr(), packed_img.size_in_bytes()); - printf("Image %u: MipLevels: %u OrigDim: %ux%u, BlockDim: %ux%u, FirstSlice: %u, HasAlpha: %u\n", i, ii.m_total_levels, ii.m_orig_width, ii.m_orig_height, + interval_timer tm; + tm.start(); + + if (!dec.transcode_image_level(level_index, layer_index, face_index, packed_img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags)) + { + fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } + + double total_transcode_time = tm.get_elapsed_ms(); + + image img(level_info.m_orig_width, level_info.m_orig_height); + + for (uint32_t y = 0; y < level_info.m_orig_height; y++) + { + for (uint32_t x = 0; x < level_info.m_orig_width; x++) + { + const uint16_t p = packed_img[x + y * level_info.m_orig_width]; + uint32_t r = p >> 11, g = (p >> 5) & 63, b = p & 31; + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + img(x, y).set(r, g, b, 255); + } + } + + fmt_printf("Transcode of level {} layer {} face {} res {}x{} format {} succeeded in {} ms\n", + level_index, layer_index, face_index, + level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt), total_transcode_time); + + if ((!validate_flag) && (!opts.m_ktx_only)) + { + std::string rgb_filename(base_filename + fmt_string("_unpacked_rgb_{}_level_{}_face_{}_layer{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(rgb_filename, img, cImageSaveIgnoreAlpha)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", rgb_filename.c_str()); + return false; + } + printf("Wrote .PNG file \"%s\"\n", rgb_filename.c_str()); + + } + + } // face_index + } // layer_index + } // level_index + + // RGBA4444 + for (uint32_t level_index = 0; level_index < dec.get_levels(); level_index++) + { + for (uint32_t layer_index = 0; layer_index < total_layers; layer_index++) + { + for (uint32_t face_index = 0; face_index < dec.get_faces(); face_index++) + { + const basist::transcoder_texture_format transcoder_tex_fmt = basist::transcoder_texture_format::cTFRGBA4444; + + basist::ktx2_image_level_info level_info; + + if (!dec.get_image_level_info(level_info, level_index, layer_index, face_index)) + { + fmt_error_printf("Failed retrieving image level information ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } + + const uint32_t total_pixels = level_info.m_orig_width * level_info.m_orig_height; + + basisu::vector packed_img(total_pixels); + + fill_buffer_with_random_bytes(packed_img.get_ptr(), packed_img.size_in_bytes()); + + interval_timer tm; + tm.start(); + + if (!dec.transcode_image_level(level_index, layer_index, face_index, packed_img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags)) + { + fmt_error_printf("Failed transcoding image level ({} {} {})!\n", layer_index, level_index, face_index); + return false; + } + + double total_transcode_time = tm.get_elapsed_ms(); + + image img(level_info.m_orig_width, level_info.m_orig_height); + + for (uint32_t y = 0; y < level_info.m_orig_height; y++) + { + for (uint32_t x = 0; x < level_info.m_orig_width; x++) + { + const uint16_t p = packed_img[x + y * level_info.m_orig_width]; + uint32_t r = p >> 12, g = (p >> 8) & 15, b = (p >> 4) & 15, a = p & 15; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + a = (a << 4) | a; + img(x, y).set(r, g, b, a); + } + } + + fmt_printf("Transcode of level {} layer {} face {} res {}x{} format {} succeeded in {} ms\n", + level_index, layer_index, face_index, + level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt), total_transcode_time); + + if ((!validate_flag) && (!opts.m_ktx_only)) + { + std::string rgba_filename(base_filename + fmt_string("_unpacked_rgba_{}_level_{}_face_{}_layer{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(rgba_filename, img)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", rgba_filename.c_str()); + return false; + } + + std::string rgb_filename(base_filename + fmt_string("_unpacked_rgb_{}_level_{}_face_{}_layer{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(rgb_filename, img, cImageSaveIgnoreAlpha)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", rgb_filename.c_str()); + return false; + } + printf("Wrote .PNG file \"%s\"\n", rgb_filename.c_str()); + + std::string a_filename(base_filename + fmt_string("_unpacked_a_{}_{}_{}_{04}.png", basist::basis_get_format_name(transcoder_tex_fmt), level_index, face_index, layer_index)); + if (!save_png(a_filename, img, cImageSaveGrayscale, 3)) + { + error_printf("Failed writing to .PNG file \"%s\"\n", a_filename.c_str()); + return false; + } + printf("Wrote .PNG file \"%s\"\n", a_filename.c_str()); + } + + } // face_index + } // layer_index + } // level_index + } + } + + return true; +} + +static bool unpack_and_validate_basis_file( + uint32_t file_index, + const std::string &base_filename, + uint8_vec &basis_file_data, + command_line_params& opts, + FILE *pCSV_file, + basis_data* pGlobal_codebook_data, + uint32_t &total_unpack_warnings, + uint32_t &total_pvrtc_nonpow2_warnings) +{ + const bool validate_flag = (opts.m_mode == cValidate); + + basist::basisu_transcoder dec; + + if (pGlobal_codebook_data) + { + dec.set_global_codebooks(&pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder()); + } + + if (!opts.m_fuzz_testing) + { + // Skip the full validation, which CRC16's the entire file. + + // Validate the file - note this isn't necessary for transcoding + if (!dec.validate_file_checksums(&basis_file_data[0], (uint32_t)basis_file_data.size(), true)) + { + error_printf("File version is unsupported, or file failed one or more CRC checks!\n"); + + return false; + } + } + + printf("File version and CRC checks succeeded\n"); + + basist::basisu_file_info fileinfo; + if (!dec.get_file_info(&basis_file_data[0], (uint32_t)basis_file_data.size(), fileinfo)) + { + error_printf("Failed retrieving Basis file information!\n"); + return false; + } + + assert(fileinfo.m_total_images == fileinfo.m_image_mipmap_levels.size()); + assert(fileinfo.m_total_images == dec.get_total_images(&basis_file_data[0], (uint32_t)basis_file_data.size())); + + printf("File info:\n"); + printf(" Version: %X\n", fileinfo.m_version); + printf(" Total header size: %u\n", fileinfo.m_total_header_size); + printf(" Total selectors: %u\n", fileinfo.m_total_selectors); + printf(" Selector codebook size: %u\n", fileinfo.m_selector_codebook_size); + printf(" Total endpoints: %u\n", fileinfo.m_total_endpoints); + printf(" Endpoint codebook size: %u\n", fileinfo.m_endpoint_codebook_size); + printf(" Tables size: %u\n", fileinfo.m_tables_size); + printf(" Slices size: %u\n", fileinfo.m_slices_size); + fmt_printf(" Block Dimensions: {}x{}\n", fileinfo.m_block_width, fileinfo.m_block_height); + + bool is_hdr = false; + + std::string fmt_str_temp; + + const char* pFmt_str = nullptr; + switch (fileinfo.m_tex_format) + { + case basist::basis_tex_format::cETC1S: + { + pFmt_str = "ETC1S"; + break; + } + case basist::basis_tex_format::cUASTC_LDR_4x4: + { + pFmt_str = "UASTC_LDR_4x4"; + break; + } + case basist::basis_tex_format::cUASTC_HDR_4x4: + { + is_hdr = true; + pFmt_str = "UASTC_HDR_4x4"; + break; + } + case basist::basis_tex_format::cASTC_HDR_6x6: + { + is_hdr = true; + pFmt_str = "ASTC_HDR_6x6"; + break; + } + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: + { + is_hdr = true; + pFmt_str = "UASTC_HDR_6x6_INTERMEDIATE"; + break; + } + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + { + uint32_t block_width = 0, block_height = 0; + basist::get_basis_tex_format_block_size(fileinfo.m_tex_format, block_width, block_height); + fmt_str_temp = fmt_string("XUASTC_LDR_{}x{}", block_width, block_height); + pFmt_str = fmt_str_temp.c_str(); + break; + } + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + uint32_t block_width = 0, block_height = 0; + basist::get_basis_tex_format_block_size(fileinfo.m_tex_format, block_width, block_height); + fmt_str_temp = fmt_string("ASTC_LDR_{}x{}", block_width, block_height); + pFmt_str = fmt_str_temp.c_str(); + break; + } + default: + { + assert(0); + return false; + } + } + + fmt_printf(" Texture format: {}\n", pFmt_str); + + printf(" Texture type: %s\n", basist::basis_get_texture_type_name(fileinfo.m_tex_type)); + printf(" us per frame: %u (%f fps)\n", fileinfo.m_us_per_frame, fileinfo.m_us_per_frame ? (1.0f / ((float)fileinfo.m_us_per_frame / 1000000.0f)) : 0.0f); + printf(" Total slices: %u\n", (uint32_t)fileinfo.m_slice_info.size()); + printf(" Total images: %i\n", fileinfo.m_total_images); + printf(" Y Flipped: %u, Has alpha slices: %u, sRGB: %u\n", fileinfo.m_y_flipped, fileinfo.m_has_alpha_slices, fileinfo.m_srgb); + printf(" userdata0: 0x%X userdata1: 0x%X\n", fileinfo.m_userdata0, fileinfo.m_userdata1); + printf(" Per-image mipmap levels: "); + for (uint32_t i = 0; i < fileinfo.m_total_images; i++) + printf("%u ", fileinfo.m_image_mipmap_levels[i]); + printf("\n"); + + // the sRGB transfer function to use while astc unpacking (we want this to ideally match what we used during astc encoding) + bool srgb_transfer_func_astc_unpacking = fileinfo.m_srgb; + + // the sRGB transfer function to use when writing out files (we want to indicate to the caller if the data is sRGB or linear) + bool srgb_transfer_func_astc_writing = fileinfo.m_srgb; + + const bool is_etc1s = (fileinfo.m_tex_format == basist::basis_tex_format::cETC1S); + const bool is_uastc_ldr_4x4 = (fileinfo.m_tex_format == basist::basis_tex_format::cUASTC_LDR_4x4); + if ((is_etc1s) || (is_uastc_ldr_4x4)) + { + // The ETC1S and UASTC LDR 4x4 transcoders supply ASTC LDR 4x4 data assuming the decoder will NOT be using the sRGB read decode profile, which is likely the most common case (in geospatial rendering scenarios). + // Note XUASTC/UASTC LDR 4x4-12x12 supports both linear and sRGB decode profiles throughout the entire pipeline (encoding/transcoding/decoding to raw pixels). + srgb_transfer_func_astc_unpacking = false; + + // This matches the behavior of our original tools. It ensures astcenc uses linear by default when reading our transcoded .KTX files. + srgb_transfer_func_astc_writing = false; + + if (fileinfo.m_srgb) + printf("Note: ETC1S/UASTC LDR 4x4 will always be decoded by this tool using the ASTC linear decode profile, regardless of the KTX2/.basis DFD transfer function field.\n"); + } + + uint32_t total_texels = 0; + + printf("\nImage info:\n"); + for (uint32_t i = 0; i < fileinfo.m_total_images; i++) + { + basist::basisu_image_info ii; + if (!dec.get_image_info(&basis_file_data[0], (uint32_t)basis_file_data.size(), ii, i)) + { + error_printf("get_image_info() failed!\n"); + return false; + } + + printf("Image %u: MipLevels: %u OrigDim: %ux%u, BlockDim: %ux%u, FirstSlice: %u, HasAlpha: %u\n", i, ii.m_total_levels, ii.m_orig_width, ii.m_orig_height, ii.m_num_blocks_x, ii.m_num_blocks_y, ii.m_first_slice_index, (uint32_t)ii.m_alpha_flag); total_texels += ii.m_width * ii.m_height; @@ -2623,16 +3357,16 @@ static bool unpack_and_validate_basis_file( printf("start_transcoding time: %3.3f ms\n", start_transcoding_time_ms); basisu::vector< gpu_image_vec > gpu_images[(int)basist::transcoder_texture_format::cTFTotalTextureFormats]; - + double total_format_transcoding_time_ms[(int)basist::transcoder_texture_format::cTFTotalTextureFormats]; clear_obj(total_format_transcoding_time_ms); int first_format = 0; int last_format = (int)basist::transcoder_texture_format::cTFTotalTextureFormats; - if (opts.m_format_only > -1) + if (opts.m_unpack_format_only > -1) { - first_format = opts.m_format_only; + first_format = opts.m_unpack_format_only; last_format = first_format + 1; } @@ -2675,6 +3409,8 @@ static bool unpack_and_validate_basis_file( gpu_images[(int)tex_fmt][image_index].resize(fileinfo.m_image_mipmap_levels[image_index]); } + uint32_t transcode_flags = get_transcode_flags_from_options(opts); + // Now transcode the file to all supported texture formats and save mipmapped KTX files for (int format_iter = first_format; format_iter < last_format; format_iter++) { @@ -2701,30 +3437,30 @@ static bool unpack_and_validate_basis_file( if ((transcoder_tex_fmt == basist::transcoder_texture_format::cTFPVRTC1_4_RGB) || (transcoder_tex_fmt == basist::transcoder_texture_format::cTFPVRTC1_4_RGBA)) { - if (!is_pow2(level_info.m_width) || !is_pow2(level_info.m_height)) + if (!is_pow2(level_info.m_orig_width) || !is_pow2(level_info.m_orig_height)) { total_pvrtc_nonpow2_warnings++; printf("Warning: Will not transcode image %u level %u res %ux%u to PVRTC1 (one or more dimension is not a power of 2)\n", image_index, level_index, level_info.m_width, level_info.m_height); - // Can't transcode this image level to PVRTC because it's not a pow2 (we're going to support transcoding non-pow2 to the next larger pow2 soon) + // Can't transcode this image level to PVRTC because it's not a pow2 (we're going to support transcoding non-pow2 to the next "larger" pow2 soon) continue; } } basisu::texture_format tex_fmt = basis_get_basisu_texture_format(transcoder_tex_fmt); + fmt_printf("Transcoding format: {}\n", (uint32_t)tex_fmt); + gpu_image& gi = gpu_images[(int)transcoder_tex_fmt][image_index][level_index]; gi.init(tex_fmt, level_info.m_orig_width, level_info.m_orig_height); // Fill the buffer with psuedo-random bytes, to help more visibly detect cases where the transcoder fails to write to part of the output. fill_buffer_with_random_bytes(gi.get_ptr(), gi.get_size_in_bytes()); - - uint32_t decode_flags = basist::cDecodeFlagsHighQuality; - + tm.start(); - if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, decode_flags)) + if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, transcode_flags)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, format_iter); return false; @@ -2742,9 +3478,9 @@ static bool unpack_and_validate_basis_file( } // image_info - // Upack UASTC files seperately, to validate we can transcode slices to UASTC and unpack them to pixels. - // This is a special path because UASTC is not yet a valid transcoder_texture_format, but a lower-level block_format. - if (fileinfo.m_tex_format == basist::basis_tex_format::cUASTC4x4) + // Upack UASTC LDR 4x4 files seperately, to validate we can transcode slices to UASTC LDR 4x4 and unpack them to pixels. + // This is a special path because UASTC LDR 4x4 is not yet a valid transcoder_texture_format, but a lower-level block_format. + if (fileinfo.m_tex_format == basist::basis_tex_format::cUASTC_LDR_4x4) { for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++) { @@ -2763,12 +3499,12 @@ static bool unpack_and_validate_basis_file( // Fill the buffer with psuedo-random bytes, to help more visibly detect cases where the transcoder fails to write to part of the output. fill_buffer_with_random_bytes(gi.get_ptr(), gi.get_size_in_bytes()); - + tm.start(); if (!dec.transcode_slice( - &basis_file_data[0], (uint32_t)basis_file_data.size(), - level_info.m_first_slice_index, gi.get_ptr(), gi.get_total_blocks(), basist::block_format::cUASTC_4x4, gi.get_bytes_per_block())) + &basis_file_data[0], (uint32_t)basis_file_data.size(), + level_info.m_first_slice_index, gi.get_ptr(), gi.get_total_blocks(), basist::block_format::cUASTC_4x4, gi.get_bytes_per_block(), transcode_flags)) { error_printf("Failed transcoding image level (%u %u) to UASTC!\n", image_index, level_index); return false; @@ -2781,9 +3517,9 @@ static bool unpack_and_validate_basis_file( if ((!validate_flag) && (!opts.m_ktx_only)) { image u; - if (!gi.unpack(u)) + if (!gi.unpack(u, srgb_transfer_func_astc_unpacking)) { - error_printf("Warning: Failed unpacking GPU texture data (%u %u) to UASTC. \n", image_index, level_index); + error_printf("Warning: Failed unpacking GPU texture data (%u %u). \n", image_index, level_index); return false; } //u.crop(level_info.m_orig_width, level_info.m_orig_height); @@ -2835,6 +3571,8 @@ static bool unpack_and_validate_basis_file( if (transcoder_tex_fmt == basist::transcoder_texture_format::cTFBC7_ALT) continue; + const bool is_fmt_astc = basis_is_transcoder_texture_format_astc(transcoder_tex_fmt); + if ((!opts.m_no_ktx) && (fileinfo.m_tex_type == basist::cBASISTexTypeCubemapArray)) { // No KTX tool that we know of supports cubemap arrays, so write individual cubemap files. @@ -2844,9 +3582,10 @@ static bool unpack_and_validate_basis_file( for (uint32_t i = 0; i < 6; i++) cubemap.push_back(gpu_images[format_iter][image_index + i]); + // KTX1 { std::string ktx_filename(base_filename + string_format("_transcoded_cubemap_%s_%u.ktx", basist::basis_get_format_name(transcoder_tex_fmt), image_index / 6)); - if (!write_compressed_texture_file(ktx_filename.c_str(), cubemap, true, true)) + if (!write_compressed_texture_file(ktx_filename.c_str(), cubemap, true, is_fmt_astc ? srgb_transfer_func_astc_writing : fileinfo.m_srgb)) { error_printf("Failed writing KTX file \"%s\"!\n", ktx_filename.c_str()); return false; @@ -2854,10 +3593,11 @@ static bool unpack_and_validate_basis_file( printf("Wrote .KTX file \"%s\"\n", ktx_filename.c_str()); } + // DDS if (does_dds_support_format(cubemap[0][0].get_format())) { std::string dds_filename(base_filename + string_format("_transcoded_cubemap_%s_%u.dds", basist::basis_get_format_name(transcoder_tex_fmt), image_index / 6)); - if (!write_compressed_texture_file(dds_filename.c_str(), cubemap, true, true)) + if (!write_compressed_texture_file(dds_filename.c_str(), cubemap, true, fileinfo.m_srgb)) { error_printf("Failed writing DDS file \"%s\"!\n", dds_filename.c_str()); return false; @@ -2884,9 +3624,10 @@ static bool unpack_and_validate_basis_file( if ((!opts.m_no_ktx) && (fileinfo.m_tex_type != basist::cBASISTexTypeCubemapArray)) { + // KTX1 { std::string ktx_filename(base_filename + string_format("_transcoded_%s_%04u.ktx", basist::basis_get_format_name(transcoder_tex_fmt), image_index)); - if (!write_compressed_texture_file(ktx_filename.c_str(), gi, true)) + if (!write_compressed_texture_file(ktx_filename.c_str(), gi, is_fmt_astc ? srgb_transfer_func_astc_writing : fileinfo.m_srgb)) { error_printf("Failed writing KTX file \"%s\"!\n", ktx_filename.c_str()); return false; @@ -2894,10 +3635,11 @@ static bool unpack_and_validate_basis_file( printf("Wrote .KTX file \"%s\"\n", ktx_filename.c_str()); } + // DDS if (does_dds_support_format(gi[0].get_format())) { std::string dds_filename(base_filename + string_format("_transcoded_%s_%04u.dds", basist::basis_get_format_name(transcoder_tex_fmt), image_index)); - if (!write_compressed_texture_file(dds_filename.c_str(), gi, true)) + if (!write_compressed_texture_file(dds_filename.c_str(), gi, fileinfo.m_srgb)) { error_printf("Failed writing DDS file \"%s\"!\n", dds_filename.c_str()); return false; @@ -2946,7 +3688,7 @@ static bool unpack_and_validate_basis_file( else { image u; - if (!gi[level_index].unpack(u)) + if (!gi[level_index].unpack(u, srgb_transfer_func_astc_unpacking)) { printf("Warning: Failed unpacking GPU texture data (%u %u %u). Unpacking as much as possible.\n", format_iter, image_index, level_index); total_unpack_warnings++; @@ -3012,7 +3754,7 @@ static bool unpack_and_validate_basis_file( } printf("Wrote .PNG file \"%s\"\n", rgba_filename.c_str()); } - + } // is_hdr } // level_index @@ -3026,7 +3768,7 @@ static bool unpack_and_validate_basis_file( uint32_t max_mipmap_levels = 0; //if (!opts.m_etc1_only) - if ((opts.m_format_only == -1) && (!validate_flag)) + if ((opts.m_unpack_format_only == -1) && (!validate_flag)) { if (is_hdr) { @@ -3052,8 +3794,8 @@ static bool unpack_and_validate_basis_file( tm.start(); - if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, - half_img.get_ptr(), total_pixels, transcoder_tex_fmt, 0, level_info.m_orig_width, nullptr, level_info.m_orig_height)) + if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, + half_img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags, level_info.m_orig_width, nullptr, level_info.m_orig_height)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3110,7 +3852,7 @@ static bool unpack_and_validate_basis_file( tm.start(); if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, - half_img.get_ptr(), total_pixels, transcoder_tex_fmt, 0, level_info.m_orig_width, nullptr, level_info.m_orig_height)) + half_img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags, level_info.m_orig_width, nullptr, level_info.m_orig_height)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3167,7 +3909,7 @@ static bool unpack_and_validate_basis_file( tm.start(); if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, - rgb9e5_img.get_ptr(), total_pixels, transcoder_tex_fmt, 0, level_info.m_orig_width, nullptr, level_info.m_orig_height)) + rgb9e5_img.get_ptr(), total_pixels, transcoder_tex_fmt, transcode_flags, level_info.m_orig_width, nullptr, level_info.m_orig_height)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3199,8 +3941,6 @@ static bool unpack_and_validate_basis_file( } // level_index } // image_index - - } else { @@ -3225,7 +3965,7 @@ static bool unpack_and_validate_basis_file( tm.start(); - if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &img(0, 0).r, img.get_total_pixels(), transcoder_tex_fmt, 0, img.get_pitch(), nullptr, img.get_height())) + if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &img(0, 0).r, img.get_total_pixels(), transcoder_tex_fmt, transcode_flags, img.get_pitch(), nullptr, img.get_height())) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3280,7 +4020,7 @@ static bool unpack_and_validate_basis_file( tm.start(); - if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &packed_img[0], (uint32_t)packed_img.size(), transcoder_tex_fmt, 0, level_info.m_orig_width, nullptr, level_info.m_orig_height)) + if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &packed_img[0], (uint32_t)packed_img.size(), transcoder_tex_fmt, transcode_flags, level_info.m_orig_width, nullptr, level_info.m_orig_height)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3343,7 +4083,7 @@ static bool unpack_and_validate_basis_file( tm.start(); - if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &packed_img[0], (uint32_t)packed_img.size(), transcoder_tex_fmt, 0, level_info.m_orig_width, nullptr, level_info.m_orig_height)) + if (!dec.transcode_image_level(&basis_file_data[0], (uint32_t)basis_file_data.size(), image_index, level_index, &packed_img[0], (uint32_t)packed_img.size(), transcoder_tex_fmt, transcode_flags, level_info.m_orig_width, nullptr, level_info.m_orig_height)) { error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, transcoder_tex_fmt); return false; @@ -3391,10 +4131,10 @@ static bool unpack_and_validate_basis_file( } // level_index } // image_index - + } // is_hdr - } // if ((opts.m_format_only == -1) && (!validate_flag)) + } // if ((opts.m_unpack_format_only == -1) && (!validate_flag)) if (pCSV_file) { @@ -3432,7 +4172,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) tm.start(); //const bool validate_flag = (opts.m_mode == cValidate); - + basis_data* pGlobal_codebook_data = nullptr; if (opts.m_etc1s_use_global_codebooks_file.size()) { @@ -3499,14 +4239,14 @@ static bool unpack_and_validate_mode(command_line_params &opts) delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; } - + bool is_ktx2 = false; if (file_data.size() >= sizeof(basist::g_ktx2_file_identifier)) { is_ktx2 = (memcmp(file_data.data(), basist::g_ktx2_file_identifier, sizeof(basist::g_ktx2_file_identifier)) == 0); } - printf("Input file \"%s\", KTX2: %u\n", pInput_filename, is_ktx2); + printf("\nInput file \"%s\", KTX2: %u\n", pInput_filename, is_ktx2); bool status; if (is_ktx2) @@ -3536,10 +4276,10 @@ static bool unpack_and_validate_mode(command_line_params &opts) if (!status) { - if (pCSV_file) + if (pCSV_file) fclose(pCSV_file); - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; @@ -3562,7 +4302,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return true; @@ -3608,7 +4348,7 @@ static bool hdr_compare_mode(command_line_params& opts) printf("Comparison image res: %ux%u\n", a.get_width(), a.get_height()); image_metrics im; - + im.calc_half(a, b, 0, 1, true); im.print("R "); @@ -3700,7 +4440,7 @@ static bool compare_mode(command_line_params &opts) im.calc(a, b, 0, 0, true, true); im.print("Y 601 " ); - + if (opts.m_compare_ssim) { vec4F s_rgb(compute_ssim(a, b, false, false)); @@ -3743,7 +4483,7 @@ static bool compare_mode(command_line_params &opts) save_png("delta_img_rgb.png", delta_img, cImageSaveIgnoreAlpha); printf("Wrote delta_img_rgb.png\n"); - + save_png("delta_img_a.png", delta_img, cImageSaveGrayscale, 3); printf("Wrote delta_img_a.png\n"); @@ -3886,7 +4626,7 @@ static bool compare_mode(command_line_params &opts) while ((int)strlen(tics2) < x) strcat(tics2, " "); - sprintf(buf, "0"); + snprintf(buf, sizeof(buf), "0"); strcat(tics, buf); } else if (((x & 7) == 0) || (x == X_SIZE)) @@ -3898,18 +4638,18 @@ static bool compare_mode(command_line_params &opts) strcat(tics2, " "); int v = (x - (int)X_SIZE / 2); - sprintf(buf, "%i", v / 10); + snprintf(buf, sizeof(buf), "%i", v / 10); strcat(tics, buf); if (v < 0) { if (-v < 10) - sprintf(buf, "%i", v % 10); + snprintf(buf, sizeof(buf), "%i", v % 10); else - sprintf(buf, " %i", -v % 10); + snprintf(buf, sizeof(buf), " %i", -v % 10); } else - sprintf(buf, "%i", v % 10); + snprintf(buf, sizeof(buf), "%i", v % 10); strcat(tics2, buf); } else @@ -3925,7 +4665,7 @@ static bool compare_mode(command_line_params &opts) } } // display_plot - + return true; } @@ -3964,7 +4704,7 @@ static bool split_image_mode(command_line_params& opts) } printf("Wrote file %s\n", buf); } - + return true; } @@ -4009,7 +4749,7 @@ static bool combine_images_mode(command_line_params& opts) const char* pOutput_filename = "combined.png"; if (opts.m_output_filename.size()) pOutput_filename = opts.m_output_filename.c_str(); - + if (!save_png(pOutput_filename, combined_img)) { fprintf(stderr, "Failed writing file %s\n", pOutput_filename); @@ -4051,7 +4791,7 @@ static bool tonemap_image_mode(command_line_params& opts) string_combine_path(output_filename, opts.m_output_path.c_str(), output_filename.c_str()); const char* pBasename = output_filename.c_str(); - + image srgb_img(width, height); image lin_img(width, height); @@ -4064,7 +4804,7 @@ static bool tonemap_image_mode(command_line_params& opts) p[0] = clamp(p[0], 0.0f, 1.0f); p[1] = clamp(p[1], 0.0f, 1.0f); p[2] = clamp(p[2], 0.0f, 1.0f); - + { int rc = (int)std::round(linear_to_srgb(p[0]) * 255.0f); int gc = (int)std::round(linear_to_srgb(p[1]) * 255.0f); @@ -4116,7 +4856,7 @@ static bool tonemap_image_mode(command_line_params& opts) for (int e = -6; e <= 6; e++) { const float scale = powf(2.0f, (float)e); - + tonemap_image_reinhard(tonemapped_img, hdr_img, scale, opts.m_tonemap_dither_flag); std::string filename(string_format("%s_reinhard_tonemapped_scale_%f.png", pBasename, scale)); @@ -4177,16 +4917,16 @@ static bool compsize_mode(command_line_params& opts) return true; } -const struct test_file +const struct etc1s_uastc_4x4_ldr_test_file { const char* m_pFilename; uint32_t m_etc1s_size; float m_etc1s_psnr; float m_uastc_psnr; - + uint32_t m_etc1s_128_size; float m_etc1s_128_psnr; -} g_test_files[] = +} g_etc1s_uastc_4x4_ldr_test_files[] = { { "black_1x1.png", 189, 100.0f, 100.0f, 189, 100.0f }, { "kodim01.png", 30993, 27.40f, 44.14f, 58354, 30.356064f }, @@ -4217,7 +4957,6 @@ const struct test_file { "wikipedia.png", 38961, 24.10f, 30.47f, 69558, 27.630802f }, { "alpha0.png", 766, 100.0f, 56.16f, 747, 100.000000f } }; -const uint32_t TOTAL_TEST_FILES = sizeof(g_test_files) / sizeof(g_test_files[0]); static bool test_mode_ldr(command_line_params& opts) { @@ -4236,14 +4975,16 @@ static bool test_mode_ldr(command_line_params& opts) #endif const float ETC1S_FILESIZE_THRESHOLD = .045f; - for (uint32_t i = 0; i < TOTAL_TEST_FILES; i++) + for (uint32_t i = 0; i < std::size(g_etc1s_uastc_4x4_ldr_test_files); i++) { + const auto& test_file = g_etc1s_uastc_4x4_ldr_test_files[i]; + std::string filename(opts.m_test_file_dir); if (filename.size()) { filename.push_back('/'); } - filename += std::string(g_test_files[i].m_pFilename); + filename += std::string(test_file.m_pFilename); basisu::vector source_images(1); @@ -4254,7 +4995,7 @@ static bool test_mode_ldr(command_line_params& opts) return false; } - printf("Loaded file \"%s\", dimemsions %ux%u has alpha: %u\n", filename.c_str(), source_image.get_width(), source_image.get_height(), source_image.has_alpha()); + printf("Loaded file \"%s\", dimensions %ux%u has alpha: %u\n", filename.c_str(), source_image.get_width(), source_image.get_height(), source_image.has_alpha()); image_stats stats; @@ -4264,7 +5005,7 @@ static bool test_mode_ldr(command_line_params& opts) // Test ETC1S flags_and_quality = (opts.m_comp_params.m_multithreading ? cFlagThreaded : 0) | cFlagPrintStats | cFlagPrintStatus; - + { printf("**** Testing ETC1S non-OpenCL level 1\n"); @@ -4279,16 +5020,16 @@ static bool test_mode_ldr(command_line_params& opts) printf("ETC1S level 1 Size: %u, PSNR: %f\n", (uint32_t)data_size, stats.m_basis_rgba_avg_psnr); - float file_size_ratio = fabs((data_size / (float)g_test_files[i].m_etc1s_size) - 1.0f); + float file_size_ratio = fabs((data_size / (float)test_file.m_etc1s_size) - 1.0f); if (file_size_ratio > ETC1S_FILESIZE_THRESHOLD) { - error_printf("Expected ETC1S file size was %u, but got %u instead!\n", g_test_files[i].m_etc1s_size, (uint32_t)data_size); + error_printf("Expected ETC1S file size was %u, but got %u instead!\n", test_file.m_etc1s_size, (uint32_t)data_size); total_mismatches++; } - if (fabs(stats.m_basis_rgba_avg_psnr - g_test_files[i].m_etc1s_psnr) > ETC1S_PSNR_THRESHOLD) + if (fabs(stats.m_basis_rgba_avg_psnr - test_file.m_etc1s_psnr) > ETC1S_PSNR_THRESHOLD) { - error_printf("Expected ETC1S RGBA Avg PSNR was %f, but got %f instead!\n", g_test_files[i].m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); + error_printf("Expected ETC1S RGBA Avg PSNR was %f, but got %f instead!\n", test_file.m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); total_mismatches++; } } @@ -4309,16 +5050,16 @@ static bool test_mode_ldr(command_line_params& opts) printf("ETC1S level 128 Size: %u, PSNR: %f\n", (uint32_t)data_size, stats.m_basis_rgba_avg_psnr); - float file_size_ratio = fabs((data_size / (float)g_test_files[i].m_etc1s_128_size) - 1.0f); + float file_size_ratio = fabs((data_size / (float)test_file.m_etc1s_128_size) - 1.0f); if (file_size_ratio > ETC1S_FILESIZE_THRESHOLD) { - error_printf("Expected ETC1S file size was %u, but got %u instead!\n", g_test_files[i].m_etc1s_128_size, (uint32_t)data_size); + error_printf("Expected ETC1S file size was %u, but got %u instead!\n", test_file.m_etc1s_128_size, (uint32_t)data_size); total_mismatches++; } - if (fabs(stats.m_basis_rgba_avg_psnr - g_test_files[i].m_etc1s_128_psnr) > ETC1S_PSNR_THRESHOLD) + if (fabs(stats.m_basis_rgba_avg_psnr - test_file.m_etc1s_128_psnr) > ETC1S_PSNR_THRESHOLD) { - error_printf("Expected ETC1S RGBA Avg PSNR was %f, but got %f instead!\n", g_test_files[i].m_etc1s_128_psnr, stats.m_basis_rgba_avg_psnr); + error_printf("Expected ETC1S RGBA Avg PSNR was %f, but got %f instead!\n", test_file.m_etc1s_128_psnr, stats.m_basis_rgba_avg_psnr); total_mismatches++; } } @@ -4340,36 +5081,36 @@ static bool test_mode_ldr(command_line_params& opts) printf("ETC1S+OpenCL Size: %u, PSNR: %f\n", (uint32_t)data_size, stats.m_basis_rgba_avg_psnr); - float file_size_ratio = fabs((data_size / (float)g_test_files[i].m_etc1s_size) - 1.0f); + float file_size_ratio = fabs((data_size / (float)test_file.m_etc1s_size) - 1.0f); if (file_size_ratio > .04f) { - error_printf("Expected ETC1S+OpenCL file size was %u, but got %u instead!\n", g_test_files[i].m_etc1s_size, (uint32_t)data_size); + error_printf("Expected ETC1S+OpenCL file size was %u, but got %u instead!\n", test_file.m_etc1s_size, (uint32_t)data_size); total_mismatches++; } - if (g_test_files[i].m_etc1s_psnr == 100.0f) + if (test_file.m_etc1s_psnr == 100.0f) { // TODO if (stats.m_basis_rgba_avg_psnr < 69.0f) { - error_printf("Expected ETC1S+OpenCL RGBA Avg PSNR was %f, but got %f instead!\n", g_test_files[i].m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); + error_printf("Expected ETC1S+OpenCL RGBA Avg PSNR was %f, but got %f instead!\n", test_file.m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); total_mismatches++; } } - else if (fabs(stats.m_basis_rgba_avg_psnr - g_test_files[i].m_etc1s_psnr) > .2f) + else if (fabs(stats.m_basis_rgba_avg_psnr - test_file.m_etc1s_psnr) > .2f) { - error_printf("Expected ETC1S+OpenCL RGBA Avg PSNR was %f, but got %f instead!\n", g_test_files[i].m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); + error_printf("Expected ETC1S+OpenCL RGBA Avg PSNR was %f, but got %f instead!\n", test_file.m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); total_mismatches++; } } // Test UASTC { - printf("**** Testing UASTC\n"); + printf("**** Testing UASTC LDR 4x4\n"); flags_and_quality = (opts.m_comp_params.m_multithreading ? cFlagThreaded : 0) | cFlagPrintStats | cFlagPrintStatus; - void* pData = basis_compress(basist::basis_tex_format::cUASTC4x4, source_images, flags_and_quality, uastc_rdo_quality, &data_size, &stats); + void* pData = basis_compress(basist::basis_tex_format::cUASTC_LDR_4x4, source_images, flags_and_quality, uastc_rdo_quality, &data_size, &stats); if (!pData) { error_printf("basis_compress() failed!\n"); @@ -4379,9 +5120,9 @@ static bool test_mode_ldr(command_line_params& opts) printf("UASTC Size: %u, PSNR: %f\n", (uint32_t)data_size, stats.m_basis_rgba_avg_psnr); - if (fabs(stats.m_basis_rgba_avg_psnr - g_test_files[i].m_uastc_psnr) > UASTC_PSNR_THRESHOLD) + if (fabs(stats.m_basis_rgba_avg_psnr - test_file.m_uastc_psnr) > UASTC_PSNR_THRESHOLD) { - error_printf("Expected UASTC RGBA Avg PSNR was %f, but got %f instead!\n", g_test_files[i].m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); + error_printf("Expected UASTC RGBA Avg PSNR was %f, but got %f instead!\n", test_file.m_etc1s_psnr, stats.m_basis_rgba_avg_psnr); total_mismatches++; } } @@ -4475,7 +5216,7 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te fmt_printf("test_mode_hdr: Testing basis_tex_format {}, lambda {}\n", (uint32_t)tex_fmt, lambda); uint32_t total_mismatches = 0; - + #ifdef USE_TIGHTER_TEST_TOLERANCES // The PSNR's above were created with a MSVC compiled executable, x64. Hopefully this is not too low a threshold. const float PSNR_THRESHOLD = .125f; @@ -4483,7 +5224,7 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te // Minor differences in how floating point code is optimized can result in slightly different generated files. const float PSNR_THRESHOLD = .3f; #endif - + double highest_delta = 0.0f; // TODO: This doesn't test all 6x6 levels, but that's fine for now. @@ -4508,8 +5249,8 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te return false; } - printf("Loaded file \"%s\", dimemsions %ux%u\n", filename.c_str(), source_image.get_width(), source_image.get_height()); - + printf("Loaded file \"%s\", dimensions %ux%u\n", filename.c_str(), source_image.get_width(), source_image.get_height()); + for (uint32_t uastc_hdr_level = 0; uastc_hdr_level <= MAX_ASTC_HDR_4x4_TEST_LEVEL; uastc_hdr_level++) { image_stats stats; @@ -4523,7 +5264,7 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te flags_and_quality |= uastc_hdr_level; void* pData = basis_compress(tex_fmt, - source_imagesf, flags_and_quality, lambda, + source_imagesf, flags_and_quality, lambda, &data_size, &stats); if (!pData) { @@ -4534,7 +5275,7 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te double delta1, delta2; - printf("ASTC PSNR: %f (expected %f, delta %f), BC6H PSNR: %f (expected %f, delta %f)\n", + printf("ASTC PSNR: %f (expected %f, delta %f), BC6H PSNR: %f (expected %f, delta %f)\n", stats.m_basis_rgb_avg_log2_psnr, pTest_files[i].m_level_psnr_astc[uastc_hdr_level], delta1 = fabs(stats.m_basis_rgb_avg_log2_psnr - pTest_files[i].m_level_psnr_astc[uastc_hdr_level]), stats.m_basis_rgb_avg_bc6h_log2_psnr, pTest_files[i].m_level_psnr_bc6h[uastc_hdr_level], delta2 = fabs(stats.m_basis_rgb_avg_bc6h_log2_psnr - pTest_files[i].m_level_psnr_bc6h[uastc_hdr_level])); @@ -4605,12 +5346,195 @@ static bool test_mode_hdr(command_line_params& opts, basist::basis_tex_format te return result; } -static bool clbench_mode(command_line_params& opts) +const uint32_t XUASTC_LDR_TEST_FILE_NUM_RUNS = 3; + +struct xuastc_ldr_test_file { - BASISU_NOTE_UNUSED(opts); + const char* m_pFilename; - bool opencl_failed = false; - bool use_cl = basis_benchmark_etc1s_opencl(&opencl_failed); + struct test_run + { + float m_dct_q; + uint32_t m_comp_size; + float m_rgba_psnr; + }; + + test_run m_test_runs[XUASTC_LDR_TEST_FILE_NUM_RUNS]; +}; + +xuastc_ldr_test_file g_xuastc_ldr_test_files_6x6[] = +{ + { "black_1x1.png", { { 100.000000f, 111, 100.000000f }, { 75.000000f, 112, 100.000000f }, { 35.000000f, 112, 100.000000f } } }, + { "kodim01.png", { { 100.000000f, 141064, 37.188324f }, { 75.000000f, 115385, 32.893822f }, { 35.000000f, 80001, 30.057878f } } }, + { "kodim02.png", { { 100.000000f, 135146, 40.280567f }, { 75.000000f, 82435, 36.618645f }, { 35.000000f, 57365, 34.556519f } } }, + { "kodim03.png", { { 100.000000f, 133654, 42.754337f }, { 75.000000f, 72161, 38.706654f }, { 35.000000f, 51462, 36.026749f } } }, + { "kodim04.png", { { 100.000000f, 138877, 40.671108f }, { 75.000000f, 84194, 36.773575f }, { 35.000000f, 61363, 34.570110f } } }, + { "kodim05.png", { { 100.000000f, 146600, 35.842682f }, { 75.000000f, 124004, 33.176735f }, { 35.000000f, 94508, 30.148199f } } }, + { "kodim06.png", { { 100.000000f, 134928, 38.721409f }, { 75.000000f, 94356, 34.459309f }, { 35.000000f, 65904, 31.435408f } } }, + { "kodim07.png", { { 100.000000f, 136807, 41.048141f }, { 75.000000f, 85150, 38.172615f }, { 35.000000f, 64387, 35.527702f } } }, + { "kodim08.png", { { 100.000000f, 145326, 35.896526f }, { 75.000000f, 119654, 33.047630f }, { 35.000000f, 92376, 29.980146f } } }, + { "kodim09.png", { { 100.000000f, 135074, 42.271267f }, { 75.000000f, 66568, 38.262554f }, { 35.000000f, 47686, 35.810940f } } }, + { "kodim10.png", { { 100.000000f, 137184, 41.879585f }, { 75.000000f, 73560, 37.980556f }, { 35.000000f, 54453, 35.449261f } } }, + { "kodim11.png", { { 100.000000f, 138275, 38.718960f }, { 75.000000f, 91902, 35.112244f }, { 35.000000f, 66243, 32.391891f } } }, + { "kodim12.png", { { 100.000000f, 132918, 42.822681f }, { 75.000000f, 71330, 38.155998f }, { 35.000000f, 49345, 35.743179f } } }, + { "kodim13.png", { { 100.000000f, 141033, 33.948277f }, { 75.000000f, 123631, 30.678318f }, { 35.000000f, 88403, 27.592640f } } }, + { "kodim14.png", { { 100.000000f, 141117, 36.902863f }, { 75.000000f, 108060, 33.896935f }, { 35.000000f, 77104, 31.451799f } } }, + { "kodim15.png", { { 100.000000f, 135981, 40.416115f }, { 75.000000f, 76564, 36.855175f }, { 35.000000f, 55002, 34.548985f } } }, + { "kodim16.png", { { 100.000000f, 134349, 42.286755f }, { 75.000000f, 80713, 36.828140f }, { 35.000000f, 55894, 33.982174f } } }, + { "kodim17.png", { { 100.000000f, 138778, 40.653671f }, { 75.000000f, 81391, 37.024017f }, { 35.000000f, 59293, 34.429058f } } }, + { "kodim18.png", { { 100.000000f, 142690, 36.400116f }, { 75.000000f, 104323, 33.398468f }, { 35.000000f, 74051, 30.714231f } } }, + { "kodim19.png", { { 100.000000f, 138584, 39.704021f }, { 75.000000f, 87574, 35.544052f }, { 35.000000f, 63776, 33.032051f } } }, + { "kodim20.png", { { 100.000000f, 121663, 41.099850f }, { 75.000000f, 64552, 37.174721f }, { 35.000000f, 44838, 34.739983f } } }, + { "kodim21.png", { { 100.000000f, 138337, 38.284393f }, { 75.000000f, 85878, 34.727512f }, { 35.000000f, 60879, 32.004494f } } }, + { "kodim22.png", { { 100.000000f, 142142, 38.583397f }, { 75.000000f, 93914, 35.047283f }, { 35.000000f, 65592, 32.702984f } } }, + { "kodim23.png", { { 100.000000f, 140280, 42.489117f }, { 75.000000f, 74579, 39.385365f }, { 35.000000f, 57354, 37.228970f } } }, + { "kodim24.png", { { 100.000000f, 138443, 36.158039f }, { 75.000000f, 101415, 33.512146f }, { 35.000000f, 75311, 30.575174f } } }, + { "white_1x1.png", { { 100.000000f, 111, 100.000000f }, { 75.000000f, 112, 100.000000f }, { 35.000000f, 112, 100.000000f } } }, + { "wikipedia.png", { { 100.000000f, 189589, 32.205330f }, { 75.000000f, 168732, 31.926851f }, { 35.000000f, 160971, 30.209082f } } }, + //{ "alpha0.png", { { 100.000000f, 1389, 49.883366f }, { 75.000000f, 1385, 49.125038f }, { 35.000000f, 1479, 42.865246f } } } // alpha0.png is minor nightmare for testing XUASTC LDR because it's very sensitive to tiny FP differences +}; + +static bool test_mode_xuastc_ldr(command_line_params& opts) +{ + uint32_t total_mismatches = 0; + + // Minor differences in how floating point code is optimized can result in slightly different generated files. + + // XUASTC LDR's IDCT is currently float - at low q's and high (>48) dB's tiny differences during decompression are noticeable + const float XUASTC_PSNR_THRESHOLD = 1.0f; + const float XUASTC_FILESIZE_THRESHOLD = .045f; + + struct run_stats + { + size_t m_comp_size; + image_stats m_stats; + }; + + basisu::vector2D< run_stats > run_image_stats((uint32_t)std::size(g_xuastc_ldr_test_files_6x6), XUASTC_LDR_TEST_FILE_NUM_RUNS); + + for (uint32_t i = 0; i < std::size(g_xuastc_ldr_test_files_6x6); i++) + { + const auto& test_file = g_xuastc_ldr_test_files_6x6[i]; + + std::string filename(opts.m_test_file_dir); + if (filename.size()) + { + filename.push_back('/'); + } + filename += std::string(test_file.m_pFilename); + + basisu::vector source_images(1); + + image& source_image = source_images[0]; + if (!load_png(filename.c_str(), source_image)) + { + error_printf("Failed loading test image \"%s\"\n", filename.c_str()); + return false; + } + + printf("Loaded file \"%s\", dimensions %ux%u has alpha: %u\n", filename.c_str(), source_image.get_width(), source_image.get_height(), source_image.has_alpha()); + + image_stats stats; + + uint32_t flags_and_quality; + + // Test XUASTC LDR + flags_and_quality = (opts.m_comp_params.m_multithreading ? cFlagThreaded : 0) | cFlagPrintStats | cFlagPrintStatus | cFlagSRGB; + + for (uint32_t run_index = 0; run_index < XUASTC_LDR_TEST_FILE_NUM_RUNS; run_index++) + { + const auto& test_run = test_file.m_test_runs[run_index]; + + float uastc_rdo_quality = 0.0f; + size_t data_size = 0; + + const uint32_t effort_level = 8; + + flags_and_quality &= ~0xFF; + flags_and_quality |= effort_level; + + if (test_run.m_dct_q < 100.0f) + { + uastc_rdo_quality = test_run.m_dct_q; + } + + basist::basis_tex_format tex_fmt = basist::basis_tex_format::cXUASTC_LDR_6x6; + + fmt_printf("**** Testing XUASTC LDR, DCT q {}, effort {}\n", test_run.m_dct_q, effort_level); + + void* pData = basis_compress(tex_fmt, source_images, flags_and_quality, uastc_rdo_quality, &data_size, &stats); + if (!pData) + { + error_printf("basis_compress() failed!\n"); + return false; + } + basis_free_data(pData); + + fmt_printf("XUASTC Size: {} (expected {}), RGBA PSNR: {3.3} dB (expected {3.3} dB)\n", + (uint32_t)data_size, test_run.m_comp_size, + stats.m_basis_rgba_avg_psnr, test_run.m_rgba_psnr); + + float file_size_ratio = fabs((data_size / (float)test_run.m_comp_size) - 1.0f); + + if (file_size_ratio > XUASTC_FILESIZE_THRESHOLD) + { + fmt_error_printf("Mismatch: Expected XUASTC LDR file size was {}, but got {} instead!\n", test_run.m_comp_size, (uint32_t)data_size); + total_mismatches++; + } + + if (fabs(stats.m_basis_rgba_avg_psnr - test_run.m_rgba_psnr) > XUASTC_PSNR_THRESHOLD) + { + fmt_error_printf("Mismatch: Expected XUASTC LDR RGBA Avg PSNR was {}, but got {} instead!\n", test_run.m_rgba_psnr, stats.m_basis_rgba_avg_psnr); + total_mismatches++; + } + + run_image_stats(i, run_index).m_comp_size = data_size; + run_image_stats(i, run_index).m_stats = stats; + } + } + +#if 0 + for (uint32_t i = 0; i < std::size(g_xuastc_ldr_test_files_6x6); i++) + { + fmt_printf("{{ \"{}\", {{", g_xuastc_ldr_test_files_6x6[i].m_pFilename); + + for (uint32_t j = 0; j < XUASTC_LDR_TEST_FILE_NUM_RUNS; j++) + { + fmt_printf(" {{ {}f, {}, {}f }", + g_xuastc_ldr_test_files_6x6[i].m_test_runs[j].m_dct_q, + run_image_stats(i, j).m_comp_size, + run_image_stats(i, j).m_stats.m_basis_rgba_avg_psnr); + + if (j != (XUASTC_LDR_TEST_FILE_NUM_RUNS - 1)) + fmt_printf(", "); + } + + fmt_printf(" } },\n"); + } +#endif + + printf("Total XUASTC LDR mismatches: %u\n", total_mismatches); + + bool result = true; + if (total_mismatches) + { + error_printf("XUASTC LDR test FAILED\n"); + result = false; + } + else + { + printf("XUASTC LDR test succeeded\n"); + } + + return result; +} + +static bool clbench_mode(command_line_params& opts) +{ + BASISU_NOTE_UNUSED(opts); + + bool opencl_failed = false; + bool use_cl = basis_benchmark_etc1s_opencl(&opencl_failed); if (use_cl) printf("OpenCL ETC1S encoding is faster on this machine\n"); else @@ -4639,17 +5563,518 @@ static void force_san_failure() } #endif // FORCE_SAN_FAILURE -static int main_internal(int argc, const char **argv) +static bool peek_astc_file(const char* pFilename) { - printf("Basis Universal LDR/HDR GPU Texture Compression and Transcoding System v" BASISU_TOOL_VERSION + fmt_printf("\nExamining .astc file: \"{}\"\n", pFilename); + + vector2D blocks; + uint32_t block_width, block_height, image_width, image_height; + if (!read_astc_file(pFilename, blocks, block_width, block_height, image_width, image_height)) + { + fmt_error_printf("Failed reading .astc file!\n"); + return false; + } + + const uint32_t total_block_pixels = block_width * block_height; + + fmt_printf("Block dimensions in pixels: {}x{}, {} total pixels\n", block_width, block_height, total_block_pixels); + fmt_printf("Image dimensions in pixels: {}x{}\n", image_width, image_height); + + fmt_printf("Extra cols/rows to pad image to ASTC block dimensions: {}x{}\n", + blocks.get_width() * block_width - image_width, + blocks.get_height() * block_height - image_height); + + image dec_image_srgb(image_width, image_height); + image dec_image_linear(image_width, image_height); + imagef dec_image_float(image_width, image_height); + + uint32_t cem_hist[16] = { }; + uint32_t cem_dp_hist[16] = { }; + uint32_t cem_used_bc_hist[16] = { }; + uint32_t total_dp = 0; + + uint32_t total_solid_blocks_ldr = 0; + uint32_t total_solid_blocks_hdr = 0; + uint32_t total_normal_blocks = 0; + + uint32_t part_hist[4] = { }; + uint32_t used_endpoint_levels_hist[astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + 1] = { }; + uint32_t used_weight_levels_hist[astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + 1] = { }; + + uint32_t total_unequal_cem_blocks = 0; + uint32_t total_unequal_cem_blocks_2subsets = 0; + uint32_t total_unequal_cem_blocks_3subsets = 0; + uint32_t total_unequal_cem_blocks_4subsets = 0; + + uint32_t highest_part_seed = 0; + + int min_weight_grid_width = INT_MAX, min_weight_grid_height = INT_MAX; + int max_weight_grid_width = 0, max_weight_grid_height = 0; + + uint32_t total_ldr_blocks = 0, total_hdr_blocks = 0; + + basisu::hash_map weight_grid_histogram; + + struct log_astc_block_config_cmp_t + { + bool operator()(const astc_helpers::log_astc_block& a, + const astc_helpers::log_astc_block& b) const + { + // This only compares the ASTC configuration for equality, NOT the contents. + if (a.m_error_flag != b.m_error_flag) + return false; + if (a.m_error_flag) + return true; + + if (a.m_grid_width != b.m_grid_width) + return false; + if (a.m_grid_height != b.m_grid_height) + return false; + + if (a.m_solid_color_flag_ldr != b.m_solid_color_flag_ldr) + return false; + if (a.m_solid_color_flag_hdr != b.m_solid_color_flag_hdr) + return false; + + if (a.m_solid_color_flag_ldr || a.m_solid_color_flag_hdr) + return true; + + if (a.m_dual_plane != b.m_dual_plane) + return false; + if (a.m_color_component_selector != b.m_color_component_selector) + return false; + + if (a.m_num_partitions != b.m_num_partitions) + return false; + + if (a.m_endpoint_ise_range != b.m_endpoint_ise_range) + return false; + if (a.m_weight_ise_range != b.m_weight_ise_range) + return false; + + for (uint32_t i = 0; i < a.m_num_partitions; i++) + if (a.m_color_endpoint_modes[i] != b.m_color_endpoint_modes[i]) + return false; + + return true; + } + }; + + basisu::hash_map, log_astc_block_config_cmp_t > unique_config_histogram; + + uint32_t total_subsets = 0; + + for (uint32_t by = 0; by < blocks.get_height(); by++) + { + for (uint32_t bx = 0; bx < blocks.get_width(); bx++) + { + astc_helpers::log_astc_block log_blk; + + if (!astc_helpers::unpack_block(&blocks(bx, by), log_blk, block_width, block_height)) + { + fmt_error_printf("astc_helpers::unpack_block() failed on block {}x{}\n", bx, by); + return false; + } + + if (log_blk.m_error_flag) + { + fmt_error_printf("astc_helpers::unpack_block() returned an error flag on block {}x{}\n", bx, by); + return false; + } + + { + astc_helpers::log_astc_block scrubbed_log_blk; + memset(&scrubbed_log_blk, 0, sizeof(scrubbed_log_blk)); + + // just record the config, not the contents, so only the config hashes + scrubbed_log_blk.m_solid_color_flag_ldr = log_blk.m_solid_color_flag_ldr; + scrubbed_log_blk.m_solid_color_flag_hdr = log_blk.m_solid_color_flag_hdr; + scrubbed_log_blk.m_dual_plane = log_blk.m_dual_plane; + scrubbed_log_blk.m_color_component_selector = log_blk.m_color_component_selector; + scrubbed_log_blk.m_grid_width = log_blk.m_grid_width; + scrubbed_log_blk.m_grid_height = log_blk.m_grid_height; + scrubbed_log_blk.m_num_partitions = log_blk.m_num_partitions; + scrubbed_log_blk.m_color_endpoint_modes[0] = log_blk.m_color_endpoint_modes[0]; + scrubbed_log_blk.m_color_endpoint_modes[1] = log_blk.m_color_endpoint_modes[1]; + scrubbed_log_blk.m_color_endpoint_modes[2] = log_blk.m_color_endpoint_modes[2]; + scrubbed_log_blk.m_color_endpoint_modes[3] = log_blk.m_color_endpoint_modes[3]; + scrubbed_log_blk.m_weight_ise_range = log_blk.m_weight_ise_range; + scrubbed_log_blk.m_endpoint_ise_range = log_blk.m_endpoint_ise_range; + + auto ins_res(unique_config_histogram.insert(scrubbed_log_blk, 0)); + (ins_res.first)->second = (ins_res.first)->second + 1; + } + + bool is_hdr = log_blk.m_solid_color_flag_hdr; + + if (log_blk.m_solid_color_flag_ldr) + { + total_solid_blocks_ldr++; + total_ldr_blocks++; + } + else if (log_blk.m_solid_color_flag_hdr) + { + total_solid_blocks_hdr++; + total_hdr_blocks++; + } + else + { + total_normal_blocks++; + + min_weight_grid_width = minimum(min_weight_grid_width, log_blk.m_grid_width); + min_weight_grid_height = minimum(min_weight_grid_height, log_blk.m_grid_height); + + max_weight_grid_width = maximum(max_weight_grid_width, log_blk.m_grid_width); + max_weight_grid_height = maximum(max_weight_grid_height, log_blk.m_grid_height); + + { + uint32_t weight_grid_hash_key = log_blk.m_grid_width | (log_blk.m_grid_height << 8); + auto ins_res(weight_grid_histogram.insert(weight_grid_hash_key, 0)); + (ins_res.first)->second = (ins_res.first)->second + 1; + } + + if (log_blk.m_dual_plane) + total_dp++; + + part_hist[log_blk.m_num_partitions - 1]++; + + // For debugging seed packing bugs + highest_part_seed = basisu::maximum(highest_part_seed, log_blk.m_partition_id); + + uint32_t cur_endpoint_ofs = 0; + bool has_unequal_cems = false; + + total_subsets += log_blk.m_num_partitions; + + for (uint32_t p = 0; p < log_blk.m_num_partitions; p++) + { + if (astc_helpers::is_cem_hdr(log_blk.m_color_endpoint_modes[p])) + is_hdr = true; + + cem_hist[log_blk.m_color_endpoint_modes[p]]++; + + if (log_blk.m_dual_plane) + cem_dp_hist[log_blk.m_color_endpoint_modes[p]]++; + + if ((p) && (log_blk.m_color_endpoint_modes[p] != log_blk.m_color_endpoint_modes[0])) + { + has_unequal_cems = true; + } + + if (astc_helpers::is_cem_ldr(log_blk.m_color_endpoint_modes[p])) + { + bool uses_bc = astc_helpers::used_blue_contraction(log_blk.m_color_endpoint_modes[p], log_blk.m_endpoints + cur_endpoint_ofs, log_blk.m_endpoint_ise_range); + + cem_used_bc_hist[log_blk.m_color_endpoint_modes[p]] += uses_bc; + } + + cur_endpoint_ofs += astc_helpers::get_num_cem_values(log_blk.m_color_endpoint_modes[p]); + } + + if (log_blk.m_num_partitions >= 2) + { + total_unequal_cem_blocks += has_unequal_cems; + + if (log_blk.m_num_partitions == 2) + total_unequal_cem_blocks_2subsets += has_unequal_cems; + else if (log_blk.m_num_partitions == 3) + total_unequal_cem_blocks_3subsets += has_unequal_cems; + else if (log_blk.m_num_partitions == 4) + total_unequal_cem_blocks_4subsets += has_unequal_cems; + } + + used_weight_levels_hist[open_range_check(log_blk.m_weight_ise_range - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE, std::size(used_weight_levels_hist))]++; + used_endpoint_levels_hist[open_range_check(log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE, std::size(used_endpoint_levels_hist))]++; + } + + if (is_hdr) + { + total_hdr_blocks++; + } + else + { + total_ldr_blocks++; + + color_rgba block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + + // sRGB8 decode profile unpack + bool status = astc_helpers::decode_block(log_blk, block_pixels, block_width, block_height, astc_helpers::cDecodeModeSRGB8); + if (!status) + { + fmt_error_printf("astc_helpers::decode_block() failed on block {}x{}\n", bx, by); + return false; + } + + dec_image_srgb.set_block_clipped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + + // linear8 decode profile unpack + status = astc_helpers::decode_block(log_blk, block_pixels, block_width, block_height, astc_helpers::cDecodeModeLDR8); + if (!status) + { + fmt_error_printf("astc_helpers::decode_block() failed on block {}x{}\n", bx, by); + return false; + } + + dec_image_linear.set_block_clipped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + } + + // half float unpack + { + basist::half_float block_pixels_half[astc_helpers::MAX_BLOCK_PIXELS][4]; + + bool status = astc_helpers::decode_block(log_blk, block_pixels_half, block_width, block_height, astc_helpers::cDecodeModeHDR16); + if (!status) + { + fmt_error_printf("astc_helpers::decode_block() failed on block {}x{}\n", bx, by); + return false; + } + + vec4F block_pixels_float[astc_helpers::MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < total_block_pixels; i++) + for (uint32_t j = 0; j < 4; j++) + block_pixels_float[i][j] = basist::half_to_float(block_pixels_half[i][j]); + + dec_image_float.set_block_clipped(block_pixels_float, bx * block_width, by * block_height, block_width, block_height); + } + + } // bx + + } //by + + fmt_printf("Total LDR blocks: {}, total HDR blocks: {}\n", total_ldr_blocks, total_hdr_blocks); + + save_png("astc_decoded_srgb8_ldr.png", dec_image_srgb); + fmt_printf("Wrote astc_decoded_srgb8_ldr.png\n"); + + save_png("astc_decoded_linear8_ldr.png", dec_image_linear); + fmt_printf("Wrote astc_decoded_linear8_ldr.png\n"); + + write_exr("astc_decoded_half.exr", dec_image_float, 4, 0); + fmt_printf("Wrote astc_decoded_half.exr\n"); + + fmt_printf("\nASTC file statistics:\n"); + + const uint32_t total_blocks = (uint32_t)blocks.size(); + + fmt_printf("Total blocks: {}, total void extent LDR: {}, total void extent HDR: {}, total normal: {}\n", total_blocks, total_solid_blocks_ldr, total_solid_blocks_hdr, total_normal_blocks); + fmt_printf("Total dual plane: {} {3.2}%\n", total_dp, total_dp * 100.0f / (float)total_blocks); + fmt_printf("Total subsets across all blocks: {}, Avg. subsets per block: {}\n", total_subsets, (float)total_subsets / (float)total_blocks); + + fmt_printf("Min weight grid dimensions: {}x{}\n", min_weight_grid_width, min_weight_grid_height); + fmt_printf("Max weight grid width: {}, height: {}\n", max_weight_grid_width, max_weight_grid_height); + + fmt_printf("\nPartition usage histogram:\n"); + for (uint32_t i = 0; i < 4; i++) + fmt_printf("{}: {} {3.2}%\n", i + 1, part_hist[i], (float)part_hist[i] * 100.0f / (float)total_blocks); + + fmt_printf("\nCEM usage histogram (percentages relative to total overall subsets used in texture):\n"); + for (uint32_t i = 0; i < 15; i++) + { + fmt_printf("{}: {} {3.2}%, total BC: {} {3.2}%, total DP: {} {3.2}%\n", i, + cem_hist[i], (float)cem_hist[i] * 100.0f / (float)total_subsets, + cem_used_bc_hist[i], (float)cem_used_bc_hist[i] * 100.0f / (float)total_subsets, + cem_dp_hist[i], (float)cem_dp_hist[i] * 100.0f / (float)total_subsets); + } + + fmt_printf("\nUsed endpoint ISE levels:\n"); + for (uint32_t i = 0; i < std::size(used_endpoint_levels_hist); i++) + fmt_printf("{} levels: {}\n", astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i), used_endpoint_levels_hist[i]); + + fmt_printf("\nUsed weight ISE levels:\n"); + for (uint32_t i = 0; i < std::size(used_weight_levels_hist); i++) + fmt_printf("{} levels: {}\n", astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + i), used_weight_levels_hist[i]); + + fmt_printf("\nTotal 2+ subset blocks using unequal CEM's: {} {3.2}%\n", total_unequal_cem_blocks, (float)total_unequal_cem_blocks * 100.0f / (float)total_blocks); + fmt_printf("Total 2 subset blocks using unequal CEM's: {} {3.2}%\n", total_unequal_cem_blocks_2subsets, (float)total_unequal_cem_blocks_2subsets * 100.0f / (float)total_blocks); + fmt_printf("Total 3 subset blocks using unequal CEM's: {} {3.2}%\n", total_unequal_cem_blocks_3subsets, (float)total_unequal_cem_blocks_3subsets * 100.0f / (float)total_blocks); + fmt_printf("Total 4 subset blocks using unequal CEM's: {} {3.2}%\n", total_unequal_cem_blocks_4subsets, (float)total_unequal_cem_blocks_4subsets * 100.0f / (float)total_blocks); + + fmt_printf("\nHighest part ID seed: {}, 0x{0x}\n", highest_part_seed, highest_part_seed); + + fmt_printf("\nWeight grid usage histogram:\n"); + + uint64_vec v; + for (auto it = weight_grid_histogram.begin(); it != weight_grid_histogram.end(); ++it) + v.push_back(((uint64_t)it->first << 32) | it->second); + + v.sort(); + + for (uint32_t i = 0; i < v.size(); i++) + fmt_printf(" {}x{}: total blocks {}\n", (v[i] >> 32) & 0xFF, (v[i] >> 40) & 0xFF, v[i] & UINT32_MAX); + + fmt_printf("\nTotal unique ASTC configurations: {}\n", unique_config_histogram.size_u32()); + + uint32_t config_idx = 0; + for (auto it = unique_config_histogram.begin(); it != unique_config_histogram.end(); ++it) + { + const auto& l = it->first; + const uint32_t total = it->second; + + fmt_printf(" {}. Used {} {3.2}% times: Solid LDR: {} HDR: {}, Grid: {}x{}, Dual Plane: {}, CCS: {}, NumParts: {}, CEMS: {} {} {} {}, WeightISERange: {}, EndpointISERange: {}\n", + config_idx, total, float(total) * 100.0f / total_blocks, + l.m_solid_color_flag_ldr, l.m_solid_color_flag_hdr, + l.m_grid_width, l.m_grid_height, + l.m_dual_plane, l.m_color_component_selector, + l.m_num_partitions, l.m_color_endpoint_modes[0], l.m_color_endpoint_modes[1], l.m_color_endpoint_modes[2], l.m_color_endpoint_modes[3], + l.m_weight_ise_range, l.m_endpoint_ise_range); + + config_idx++; + } + + fmt_printf("Success\n"); + + return true; +} + +bool xuastc_ldr_decoder_fuzz_test() +{ + basisu::rand rnd; + rnd.seed(1); + + const uint32_t N = 16; + + interval_timer itm; + double total_time_a = 0, total_time_b = 0; + + for (uint32_t blk_size_index = 0; blk_size_index < astc_helpers::NUM_ASTC_BLOCK_SIZES; blk_size_index++) + { + const uint32_t bw = astc_helpers::g_astc_block_sizes[blk_size_index][0]; + const uint32_t bh = astc_helpers::g_astc_block_sizes[blk_size_index][1]; + + fmt_printf("Testing block size {}x{}\n", bw, bh); + + const auto& trial_modes = basist::astc_ldr_t::g_encoder_trial_modes[blk_size_index]; + + if (!trial_modes.size()) + { + assert(0); + return false; + } + + for (uint32_t j = 0; j < trial_modes.size(); j++) + { + const auto& tm = trial_modes[j]; + + astc_helpers::log_astc_block log_blk; + log_blk.clear(); + + const bool test_solid = rnd.irand(0, 63) == 0; + + log_blk.m_grid_width = (uint8_t)tm.m_grid_width; + log_blk.m_grid_height = (uint8_t)tm.m_grid_height; + + log_blk.m_weight_ise_range = (uint8_t)tm.m_weight_ise_range; + log_blk.m_endpoint_ise_range = (uint8_t)tm.m_endpoint_ise_range; + + log_blk.m_dual_plane = tm.m_ccs_index != -1; + if (tm.m_ccs_index != -1) + log_blk.m_color_component_selector = (uint8_t)tm.m_ccs_index; + + log_blk.m_num_partitions = (uint8_t)tm.m_num_parts; + for (uint32_t s = 0; s < tm.m_num_parts; s++) + log_blk.m_color_endpoint_modes[s] = (uint8_t)tm.m_cem; + + for (uint32_t k = 0; k < N; k++) + { + if (log_blk.m_num_partitions > 1) + log_blk.m_partition_id = (uint16_t)rnd.irand(0, 1023); + + const uint32_t num_cem_endpoint_vals = astc_helpers::get_num_cem_values(tm.m_cem); + const uint32_t total_cem_endpoint_vals = num_cem_endpoint_vals * log_blk.m_num_partitions; + + for (uint32_t i = 0; i < total_cem_endpoint_vals; i++) + log_blk.m_endpoints[i] = (uint8_t)rnd.irand(0, astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range) - 1); + + const uint32_t num_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + for (uint32_t i = 0; i < num_weight_vals; i++) + log_blk.m_weights[i] = (uint8_t)rnd.irand(0, astc_helpers::get_ise_levels(log_blk.m_weight_ise_range) - 1); + + if (test_solid) + { + log_blk.clear(); + log_blk.m_solid_color_flag_ldr = true; + + uint32_t r = rnd.byte(); + uint32_t g = rnd.byte(); + uint32_t b = rnd.byte(); + uint32_t a = rnd.byte(); + + log_blk.m_solid_color[0] = (uint16_t)((r << 8) | r); + log_blk.m_solid_color[1] = (uint16_t)((g << 8) | g); + log_blk.m_solid_color[2] = (uint16_t)((b << 8) | b); + log_blk.m_solid_color[3] = (uint16_t)((a << 8) | a); + } + + const bool srgb = rnd.bit(); + + basist::color32 blk_a[astc_helpers::MAX_BLOCK_PIXELS]; + clear_obj(blk_a); + + itm.start(); + + bool status_a = astc_helpers::decode_block(log_blk, blk_a, bw, bh, srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status_a) + { + error_printf("astc_helpers::decode_block() failed\n"); + return false; + } + + total_time_a += itm.get_elapsed_secs(); + + basist::color32 blk_b[astc_helpers::MAX_BLOCK_PIXELS]; + clear_obj(blk_b); + + itm.start(); + + bool status_b = astc_helpers::decode_block_xuastc_ldr(log_blk, blk_b, bw, bh, srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status_b) + { + error_printf("astc_helpers::decode_block() failed\n"); + return false; + } + + total_time_b += itm.get_elapsed_secs(); + + for (uint32_t i = 0; i < bw * bh; i++) + { + if ((blk_a[i].r != blk_b[i].r) || (blk_a[i].g != blk_b[i].g) || (blk_a[i].b != blk_b[i].b) || (blk_a[i].a != blk_b[i].a)) + { + error_printf("decode block mismatch\n"); + return false; + } + } + + } // k + + } // j + + } // blk_size_index + + printf("ASTC block decoder vs. XUASTC LDR block decoding fuzz test succeeded\n"); + fmt_printf("Total time A: {}, B: {}\n", total_time_a, total_time_b); + + return true; +} + +static int main_internal(int argc, const char** argv) +{ + printf("Basis Universal LDR/HDR GPU Texture Supercompression System v" BASISU_TOOL_VERSION + #if defined(_ARM64EC_) || defined(_ARM64_) - " (ARM64)" + " (ARM64)" #elif defined(_M_IX86) - " (x86)" + " (x86)" #elif defined(_M_X64) || defined(_M_AMD64) - " (x64)" + " (x64)" +#elif defined(__wasi__) + " (WASI" + #if BASISU_WASI_THREADS + " Threaded" + #endif + ")" #endif - "\nCopyright (C) 2019-2025 Binomial LLC, All rights reserved\n"); + + "\nCopyright (C) 2019-2026 Binomial LLC, All rights reserved\n"); #ifdef FORCE_SAN_FAILURE force_san_failure(); @@ -4657,37 +6082,69 @@ static int main_internal(int argc, const char **argv) //interval_timer tm; //tm.start(); - + // See if OpenCL support has been disabled. We don't want to parse the command line until the lib is initialized bool use_opencl = false; bool opencl_force_serialization = false; + bool astc_peek_flag = false; + bool astc_fuzz_flag = false; for (int i = 1; i < argc; i++) { if ((strcmp(argv[i], "-opencl") == 0) || (strcmp(argv[i], "-clbench") == 0)) use_opencl = true; + if (strcmp(argv[i], "-opencl_serialize") == 0) opencl_force_serialization = true; + + if ((strcmp(argv[i], "-peek_astc") == 0) || (strcmp(argv[i], "-peek") == 0)) + astc_peek_flag = true; + + if (strcmp(argv[i], "-dev_astc_fuzz") == 0) + astc_fuzz_flag = true; } #if !BASISU_SUPPORT_OPENCL if (use_opencl) { - fprintf(stderr, "WARNING: -opencl specified, but OpenCL support was not enabled at compile time! With cmake, use -D BASISU_OPENCL=1. Falling back to CPU compression.\n"); + fprintf(stderr, "WARNING: -opencl specified, but OpenCL support was not defined or enabled at compile time! With cmake, use -D BASISU_OPENCL=1. Falling back to CPU compression.\n"); } #endif basisu_encoder_init(use_opencl, opencl_force_serialization); + + if (astc_fuzz_flag) + { + bool status = xuastc_ldr_decoder_fuzz_test(); + return status ? EXIT_SUCCESS : EXIT_FAILURE; + } - //printf("Encoder and transcoder libraries initialized in %3.3f ms\n", tm.get_elapsed_ms()); + if (astc_peek_flag) + { + if (argc != 3) + { + fmt_error_printf("Requires filename argument of .astc file\n"); + return EXIT_FAILURE; + } + + bool status = peek_astc_file(argv[2]); + return status ? EXIT_SUCCESS : EXIT_FAILURE; + } + //printf("Encoder and transcoder libraries initialized in %3.3f ms\n", tm.get_elapsed_ms()); + if (argc == 1) { print_usage(); return EXIT_FAILURE; } - + command_line_params opts; + +#if defined(__wasi__) && !BASISU_WASI_THREADS + opts.m_comp_params.m_multithreading = false; +#endif + if (!opts.parse(argc, argv)) { //print_usage(); @@ -4699,7 +6156,7 @@ static int main_internal(int argc, const char **argv) #else printf("No SSE, Multithreading: %u, Zstandard support: %u, OpenCL: %u\n", (uint32_t)opts.m_comp_params.m_multithreading, basist::basisu_transcoder_supports_ktx2_zstd(), opencl_is_available()); #endif - + if (!opts.process_listing_files()) return EXIT_FAILURE; @@ -4748,6 +6205,9 @@ static int main_internal(int argc, const char **argv) case cTestLDR: status = test_mode_ldr(opts); break; + case cTestXUASTCLDR: + status = test_mode_xuastc_ldr(opts); + break; case cTestHDR_4x4: status = test_mode_hdr(opts, basist::basis_tex_format::cUASTC_HDR_4x4, std::size(g_hdr_4x4_test_files), g_hdr_4x4_test_files, 0.0f); break; @@ -4755,11 +6215,11 @@ static int main_internal(int argc, const char **argv) status = test_mode_hdr(opts, basist::basis_tex_format::cASTC_HDR_6x6, std::size(g_hdr_6x6_test_files), g_hdr_6x6_test_files, 0.0f); break; case cTestHDR_6x6i: - status = test_mode_hdr(opts, basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE, std::size(g_hdr_6x6i_test_files), g_hdr_6x6i_test_files, 0.0f); - + status = test_mode_hdr(opts, basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE, std::size(g_hdr_6x6i_test_files), g_hdr_6x6i_test_files, 0.0f); + if (status) { - status = test_mode_hdr(opts, basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE, std::size(g_hdr_6x6i_l_test_files), g_hdr_6x6i_l_test_files, 500.0f); + status = test_mode_hdr(opts, basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE, std::size(g_hdr_6x6i_l_test_files), g_hdr_6x6i_l_test_files, 500.0f); } break; @@ -4786,7 +6246,7 @@ static int main_internal(int argc, const char **argv) //----------------------------------------------------------------------------------- #if CLEAR_WIN32_CONSOLE -void clear_console() +void clear_console() { //if (!IsDebuggerPresent()) // return; @@ -4810,6 +6270,39 @@ void clear_console() //----------------------------------------------------------------------------------- +// Attempt to detect AddressSanitizer (ASan) across compilers - only used for debug +// output purposes. +#ifndef DETECT_ASAN_H +#define DETECT_ASAN_H + +// Start with ASAN disabled +#undef USING_ASAN + +#if defined(__wasi__) + #define USING_ASAN 0 +#else + // --- Clang / Apple Clang: use __has_feature --- + #if defined(__has_feature) + # if __has_feature(address_sanitizer) + # define USING_ASAN 1 + # endif + #endif + + // --- GCC: __SANITIZE_ADDRESS__ --- + #if defined(__SANITIZE_ADDRESS__) + # define USING_ASAN 1 + #endif +#endif // #if defined(__wasi__) + +// If still undefined, ensure USING_ASAN is cleanly defined to 0 +#ifndef USING_ASAN +# define USING_ASAN 0 +#endif + +#endif // DETECT_ASAN_H + +//----------------------------------------------------------------------------------- + int main(int argc, const char** argv) { #ifdef _WIN32 @@ -4818,15 +6311,19 @@ int main(int argc, const char** argv) #if CLEAR_WIN32_CONSOLE clear_console(); + fmt_printf("{}\n", argv[0]); #endif #if defined(DEBUG) || defined(_DEBUG) - printf("DEBUG build\n"); + printf("DEBUG or _DEBUG defined\n"); +#endif +#if !defined(NDEBUG) + printf("NDEBUG is NOT defined\n"); #endif -#ifdef __SANITIZE_ADDRESS__ +#if USING_ASAN printf("Address sanitizer enabled\n"); #endif - + int status = EXIT_FAILURE; #if BASISU_CATCH_EXCEPTIONS @@ -4848,3 +6345,4 @@ int main(int argc, const char** argv) return status; } + diff --git a/external/basis_universal/basisu_tool_help.h b/external/basis_universal/basisu_tool_help.h new file mode 100644 index 0000000000..486e166465 --- /dev/null +++ b/external/basis_universal/basisu_tool_help.h @@ -0,0 +1,715 @@ +// Don't edit directly - see python script in cmd_help directory. +"See project and wiki at: https://github.com/BinomialLLC/basis_universal\n" +"\n" +"The default processing mode is compression of one or more .PNG/.TGA/.JPG/.QOI/\n" +".DDS/.EXR/.HDR files to a LDR or HDR .KTX2 file. Alternate modes:\n" +"\n" +" -unpack: Use transcoder to unpack a .basis/.KTX2 file to one or more .KTX, \n" +" .DDS, .PNG, .ASTC, etc. files.\n" +"\n" +" -info: Display high-level information about a .basis/.KTX2 file\n" +" \n" +" -validate: Validate and display information about a .basis/.KTX2 file\n" +" \n" +" -compare: Compare two LDR PNG/BMP/TGA/JPG/QOI images specified with -file,\n" +" output PSNR and SSIM statistics and RGB/A delta images\n" +"\n" +" -compare_hdr: Compare two HDR .EXR/.HDR images specified with -file, output\n" +" PSNR statistics and RGB delta images\n" +"\n" +" -tonemap: Tonemap an HDR or EXR image to PNG at multiple exposures, use -file\n" +" to specify filename\n" +"\n" +" -peek_astc: Read an .astc file and calculate statistics (for testing/dev)\n" +"\n" +" -version or --version: Print version and exit\n" +"\n" +"--- Intro:\n" +"\n" +"This tool compresses LDR/SDR and HDR images and textures to a Basis Universal\n" +"supercompressed GPU texture, which can be written to supercompressed .basis or\n" +"standard .KTX2 files. It supports a number of SDR and HDR codecs, each with\n" +"different quality, transcoding performance, and bitrate tradeoffs. A SDR/HDR\n" +"mipmap generator is also included. This tool can also examine and\n" +"unpack .KTX2/.basis files to .PNG, .KTX (v1), .ASTC, or .DDS files using its\n" +"single source file transcoder library.\n" +"\n" +"The list of supported texture/supercompressed texture codecs:\n" +" -LDR: ETC1S (SDR default), RDO UASTC LDR 4x4, ASTC or XUASTC LDR 4x4-12x12\n" +" -HDR: UASTC HDR 4x4 (HDR default), RDO ASTC HDR 6x6, UASTC HDR 6x6\n" +" \n" +"RDO=Rate-Distortion Optimization. Two key parameters (quality and effort) have\n" +"been unified across all the codecs:\n" +"\n" +"- The -quality X parameter, where X ranges from [0, 100], controls the\n" +"compression quality vs. bitrate (output file size) tradeoff for those codecs\n" +"supporting supercompression or RDO (Rate-distortion optimization). 100=max\n" +"quality, and lower levels produce smaller files with more distortion.\n" +"\n" +"- The -effort X parameter, where X ranges from [0, 10], controls the\n" +"compression speed (and max CPU usage) vs. max achievable quality tradeoff.\n" +"Low efforts result in more distortion/artifacts, but faster compression. Lower\n" +"efforts result in less utilization of the underlying GPU block format's\n" +"capabilities.\n" +"\n" +"Key Definitions: \n" +"\"Quality\" controls the explicit tradeoff between output distortion and output\n" +"file bitrate (in bits per pixel or target). At max quality (100) each\n" +"compressor will output the lowest distortion it's capable of at its currently\n" +"configured effort level.\n" +"\n" +"\"Effort\" controls how intensely a compressor uses the CPU to focus on each\n" +"block format's encoding capabilities. Low effort levels only target a set of\n" +"core or basic capabilities (specific to each output target format), while\n" +"higher effort levels allow each compressor to explore more of each target's\n" +"features (at the cost of higher CPU time). Lower effort levels result in more\n" +"brittle compression (higher distortion on tough image/texture features).\n" +"Effort=0 fastest compression, effort=10=extremely slow.\n" +"\n" +"Unless an explicit mode is specified, if one or more files have the .basis\n" +"or .KTX2 extension this tool defaults to unpack mode.\n" +"\n" +"By default, the compressor assumes the input is in the sRGB colorspace (like\n" +"typical photos/albedo textures). If the input is NOT sRGB (like a normal map),\n" +"be sure to specify -linear for less artifacts. Depending on the content type,\n" +"some experimentation may be needed.\n" +"\n" +"The TinyEXR library is used to read .EXR images. Crucially, this small library\n" +"does not support all .EXR compression methods. For unsupported images, you can\n" +"use a tool like ImageMagick to convert them to uncompressed .EXR.\n" +"\n" +"For .DDS source files: Mipmapped or plain 2D textures (but not cubemaps) are\n" +"supported. Only uncompressed 32-bit RGBA/BGRA, half float RGBA, or float\n" +"RGBA .DDS files are supported. In -tex_array mode, if a .DDS file is specified,\n" +"all source files must be in .DDS format.\n" +"\n" +"Filenames prefixed with a @ symbol are read as filename listing files. Listing\n" +"text files specify which actual filenames to process (one filename per line).\n" +"\n" +"--- High-Level Texture Mode (Codec) Selection:\n" +"\n" +" 1. -etc1s: Encode to supercompressed ETC1S LDR (the default for SDR/LDR\n" +" inputs). Roughly .8-2.5 bpp. Supports temporal texture supercompression\n" +" (texture video) with skip blocks (Conditional Replenishment), with global\n" +" codebooks shared across all frames.\n" +"\n" +" 2. -uastc/-uastc_ldr: UASTC LDR 4x4. Encode to UASTC LDR 4x4, a custom high\n" +" quality virtual texture format designed for fast transcoding to numerous\n" +" GPU texture formats. Roughly 5-8 bpp. Supports RDO encoding using -lambda X\n" +" option - see options below. In this mode the multi-target compressor\n" +" optimizes for a balance of transcoded ASTC 4x4 LDR and BC7 quality.\n" +"\n" +" 3. -hdr/-hdr_4x4: UASTC HDR 4x4. Encode input as UASTC HDR 4x4 (the default if\n" +" any input file has the .EXR or .HDR extension, or if any .DDS file is HDR).\n" +" Output is standard, but constrained, ASTC HDR 4x4. Roughly 5-8 bpp. In this\n" +" mode the dual-target compressor optimizes for a balance of transcoded ASTC\n" +" 4x4 HDR and BC6H quality.\n" +"\n" +" 4. -hdr_6x6: ASTC HDR 6x6. Encode input as RDO or highest quality standard\n" +" ASTC HDR 6x6. Use -quality (preferred) or -lambda X (low-level, try\n" +" 100-20000 or higher) option to enable RDO ASTC HDR 6x6, where x controls\n" +" the quality vs. size tradeoff. Roughly 1.2-3.2 bpp.\n" +"\n" +" 5. -hdr_6x6i: UASTC HDR 6x6. Encode input as supercompressed UASTC HDR 6x6\n" +" intermediate. Use -quality (preferred) or -lambda X (low-level, try\n" +" 100-20000 or higher) option to enable RDO UASTC HDR 6x6, where x controls\n" +" the quality vs. size tradeoff. Roughly 1-3.2 bpp.\n" +"\n" +" 6. XUASTC LDR 4x4-12x12: -ldr_4x4i, -ldr_5x4i, -ldr_5x5i, -ldr_6x5i,\n" +" -ldr_6x6i, -ldr_8x5i, -ldr_8x6i, -ldr_10x5i, -ldr_10x6i, -ldr_8x8i,\n" +" -ldr_10x8i, -ldr_10x10i, -ldr_12x10i, -ldr_12x12i:\n" +" Compress to supercompressed XUASTC LDR/SDR using the specific\n" +" ASTC block size. See additional ASTC/XUASTC LDR specific options\n" +" (-effort, -quality, -xy, -ts, -tl, etc.) below. Roughly .3-5.7 bpp\n" +"\n" +" 7. ASTC LDR 4x4-12x12: -ldr_4x4, -ldr_5x4, -ldr_5x5, -ldr_6x5, -ldr_6x6,\n" +" -ldr_8x5, -ldr_8x6, -ldr_10x5, -ldr_10x6, -ldr_8x8, -ldr_10x8,\n" +" -ldr_10x10, -ldr_12x10, -ldr_12x12:\n" +" Compress to standard or ZStd supercompressed ASTC LDR/SDR using\n" +" the specific ASTC block size. See additional ASTC LDR specific\n" +" options (-effort, -quality, -xy, -ts, -tl, etc.) below. .89-8 bpp before\n" +" ZStd compression.\n" +"\n" +"--- Tool Options:\n" +"\n" +" -ktx2: Write .KTX2 files (the default). By default, UASTC LDR/HDR 4x4 and ASTC\n" +" 6x6 files will be compressed using Zstandard unless -ktx2_no_zstandard is\n" +" specified.\n" +"\n" +" -basis: Write .basis files instead of .KTX2 files.\n" +"\n" +" -file filename.png/tga/jpg/qoi/exr/hdr: Input image filename, multiple images\n" +" are OK, use -file X for each input filename (prefixing input filenames\n" +" with -file is optional)\n" +"\n" +" -alpha_file filename.png/tga/jpg/qoi: Input alpha image filename, multiple\n" +" images are OK, use -file X for each input filename (must be paired\n" +" with -file), images converted to REC709 grayscale and used as input alpha\n" +"\n" +" -quiet or -no_status_output: Disable compressor's status output to stdout\n" +"\n" +" -output_file filename: Output .basis/.KTX2 filename\n" +"\n" +" -output_path: Output .basis/.KTX2 files to specified directory.\n" +"\n" +" -debug or -verbose: Enable codec debug print to stdout (slightly slower).\n" +"\n" +" -debug_images: Enable codec debug images (much slower).\n" +"\n" +" -stats: Compute and display image quality metrics (slightly to much slower).\n" +"\n" +" -individual: Process input images individually and output\n" +" multiple .basis/.KTX2 files (not as a texture array - this is now the default\n" +" as of v1.16)\n" +"\n" +" -parallel: Compress multiple textures simultaneously (one per thread), instead\n" +" of one at a time. Compatible with OpenCL mode. This is much faster, but in\n" +" OpenCL mode the driver is pushed harder, and the CLI output will be jumbled.\n" +"\n" +" -linear: Use linear colorspace metrics (instead of the default sRGB or scaled\n" +" RGB for HDR), write linear transfer function setting to KTX2/basis file, and\n" +" by default linear (not sRGB) mipmap filtering (unless overridden). Same\n" +" as -tl.\n" +"\n" +" -srgb: Use sRGB colorspace metrics, write sRGB transfer function setting to\n" +" KTX2/basis file, and by default use sRGB mipmap filtering (unless\n" +" overridden). Same as -ts.\n" +"\n" +" -tex_type <2d, 2darray, 3d, video, cubemap>: Set Basis file header's texture\n" +" type field. Cubemap arrays require multiples of 6 images, in X+, X-, Y+, Y-,\n" +" Z+, Z- order, each image must be the same resolutions. 2d=arbitrary 2D\n" +" images, 2darray=2D array, 3D=volume texture slices, video=video frames,\n" +" cubemap=array of faces. For 2darray/3d/cubemaps/video, each source image's\n" +" dimensions and # of mipmap levels must be the same. For video, the .basis\n" +" file will be written with the first frame being an I-Frame, and subsequent\n" +" frames being P-Frames (using conditional replenishment). Playback must always\n" +" occur in order from first to last image.\n" +"\n" +" -cubemap: same as -tex_type cubemap\n" +"\n" +" -tex_array: Process input images as a single texture array and write a\n" +" single .basis/.KTX2 file (the former default before v1.16)\n" +"\n" +" -fuzz_testing: Use with -validate: Disables CRC16 validation of file contents\n" +" before transcoding\n" +"\n" +" -multifile_printf: printf() format string to use to compose multiple filenames\n" +"\n" +" -multifile_first: The index of the first file to process, default is 0 (must\n" +" specify -multifile_printf and -multifile_num)\n" +"\n" +" -multifile_num: The total number of files to process.\n" +"\n" +" -opencl: Enable OpenCL usage (currently only accelerates ETC1S encoding)\n" +"\n" +" -opencl_serialize: Serialize all calls to the OpenCL driver (to work around\n" +" buggy drivers, only useful with -parallel)\n" +"\n" +"--- ETC1S specific options (-etc1s - the LDR/SDR default):\n" +"\n" +" -quality X and -effort X: Set quality (1-100) and effort (0-10) levels\n" +"\n" +" -q X: Low-level ETC1S quality level, 1-255, default is 128, lower=better\n" +" compression/lower quality/faster, higher=less compression/higher\n" +" quality/slower, default is 128. For even higher quality,\n" +" use -max_endpoints/-max_selectors. (-quality is preferred.)\n" +"\n" +" -comp_level X: Low-level ETC1S speed vs. quality tradeoff. Range is 0-6,\n" +" default is 1. Higher values=MUCH slower, but slightly higher quality. Higher\n" +" levels intended for videos. (-effort is preferred.)\n" +"\n" +" -max_endpoints X: ETC1S: Manually set the max number of color endpoint\n" +" clusters from 1-16128, use instead of -q\n" +"\n" +" -max_selectors X: ETC1S: Manually set the max number of color selector\n" +" clusters from 1-16128, use instead of -q\n" +"\n" +"--- UASTC LDR/HDR 4x4 specific options (-uastc or -uastc_ldr):\n" +"\n" +" -quality X and -effort X: Set quality (1-100) and effort (0-10) levels\n" +"\n" +" -uastc, -uastc_ldr or -uastc_ldr_4x4: Enable UASTC LDR 4x4 texture mode,\n" +" instead of the default ETC1S mode. Significantly higher texture quality, but\n" +" much larger (~8bpp) files. (Note that UASTC LDR 4x4 .basis files must be\n" +" losslessly compressed by the user.)\n" +"\n" +" -uastc_level: Set low-level UASTC LDR/HDR 4x4 encoding effort level. LDR Range\n" +" is [0,4], default is 2, higher=slower but higher quality. 0=fastest/lowest\n" +" quality, 3=slowest practical option, 4=impractically slow/highest achievable\n" +" quality. UASTC HDR 4x4 range is [0,4]: higher=slower, but higher quality. HDR\n" +" 4x4 default level=1.\n" +"\n" +" -uastc_rdo_l X: Enable UASTC LDR 4x4 RDO post-processing and set the low-level\n" +" UASTC LDR 4x4 RDO quality scalar (lambda) to X. Lower values=higher\n" +" quality/larger LZ compressed files, higher values=lower quality/smaller LZ\n" +" compressed files. Good range to try is [.25-10]. Note: Previous versons used\n" +" the -uastc_rdo_q option, which was removed because the RDO algorithm was\n" +" changed.\n" +"\n" +" -uastc_rdo_d X: Set UASTC LDR 4x4 RDO dictionary size in bytes. Default is\n" +" 4096, max is 65536. Lower values=faster, but less compression.\n" +"\n" +" -uastc_rdo_b X: Set UASTC LDR 4x4 RDO max smooth block error scale. Range is\n" +" [1,300]. Default is 10.0, 1.0=disabled. Larger values suppress more artifacts\n" +" (and allocate more bits) on smooth blocks.\n" +"\n" +" -uastc_rdo_s X: Set UASTC LDR 4x4 RDO max smooth block standard deviation.\n" +" Range is [.01,65536]. Default is 18.0. Larger values expand the range of\n" +" blocks considered smooth.\n" +"\n" +" -uastc_rdo_f: Don't favor simpler UASTC LDR 4x4 modes in RDO mode.\n" +"\n" +" -uastc_rdo_m: Disable RDO multithreading (slightly higher compression,\n" +" deterministic).\n" +"\n" +"--- UASTC HDR 4x4 specific options (-hdr or -hdr_4x4 - the HDR default):\n" +"\n" +" -hdr, -hdr_4x4, or -uastc_hdr_4x4: Enable UASTC HDR 4x4 mode\n" +"\n" +" -quality X and -effort X: Set quality (1-100) and effort (0-10) levels\n" +"\n" +" -uastc_level X: Sets the low-level UASTC HDR 4x4 compressor's effort level.\n" +" Valid range is [0,4]: higher=slower but higher quality. HDR\n" +" default=1. Level 0=fastest/lowest quality, 3=highest practical\n" +" setting, 4=exhaustive\n" +"\n" +" -hdr_uber_mode: Allow the UASTC HDR 4x4 encoder to try varying the CEM 11\n" +" selectors more for slightly higher quality (slower). This may negatively\n" +" impact BC6H quality, however.\n" +"\n" +" -hdr_ultra_quant: UASTC HDR 4x4: Try to find better quantized CEM 7/11\n" +" endpoint values (slower).\n" +"\n" +" -hdr_favor_astc: UASTC HDR 4x4: By default the dual-target UASTC HDR 4x4\n" +" encoder tries to strike a balance or even slightly favor BC6H quality. If\n" +" this option is specified, ASTC HDR 4x4 quality is favored instead.\n" +"\n" +"--- ASTC/UASTC HDR 6x6 specific options (-hdr_6x6 or -hdr_6x6i):\n" +"\n" +"Internally both modes use the same compressor which can generate either\n" +"standard ASTC HDR 6x6 (with optional RDO) or UASTC HDR 6x6 (supercompressed\n" +"with a custom format).\n" +"\n" +" -hdr_6x6 or -astc_hdr_6x6: Enable RDO ASTC HDR 6x6 mode\n" +"\n" +" -hdr_6x6i or -uastc_hdr_6x6: Enable UASTC HDR 6x6 mode\n" +"\n" +" -quality X and -effort X: Set quality (1-100) and effort (0-10) levels\n" +"\n" +" -lambda X: Low-level option to enable rate distortion optimization (RDO) and\n" +" directly control the HDR 6x6 compressor's lambda setting. The\n" +" higher this value, the lower the quality, but the smaller the file\n" +" size. Try 100-20000, or higher values on some images. Upconverted\n" +" SDR images can generally tolerate much higher lambda settings vs.\n" +" true HDR images.\n" +"\n" +" -hdr_6x6_level X: Low-level option to set the codec to 6x6 HDR mode (same\n" +" as -hdr_6x6) and controls encoder performance vs. max quality\n" +" tradeoff. X may range from [0,12]. Default level is 2. Higher\n" +" values result in better quality but slower encoding. Values above\n" +" 10 are extremely slow.\n" +"\n" +" -hdr_6x6i_level X: Low-level option to set the codec to 6x6 HDR intermediate\n" +" mode (same as -hdr_6x6i) and controls encoder performance vs. max\n" +" quality tradeoff. X may range from [0,12]. Default level is 2.\n" +"\n" +" -rec_2020: The input image's gamut is Rec. 2020 vs. the default Rec. 709 - for\n" +" accurate colorspace error calculations. This value will also be\n" +" written to the KTX2 file's header in the DFD.\n" +"\n" +" -hdr_6x6_jnd X, -hdr_6x6_extra_pats, -hdr_6x6_brute_force_pats,\n" +" -hdr_6x6_comp_levels X Y, or -hdr_6x6i_comp_levels X Y: Low-level\n" +" control over the encoder's configuration.\n" +"\n" +" -hdr_6x6i_16_compatibility: Write basisu v1.6 UASTC HDR 6x6i files (default)\n" +"\n" +" -hdr_6x6i_20_compatibility: Write basisu v2.0 UASTC HDR 6x6i files (slightly\n" +" lower bitrate, only compatible with v2.0+ transcoders however.)\n" +"\n" +"--- SDR/LDR->HDR upconversion options (only used when encoding to HDR formats\n" +"from an LDR/SDR source image):\n" +"\n" +" -hdr_ldr_no_srgb_to_linear: If specified, LDR images will NOT be converted to\n" +" normalized linear light (via a sRGB->Linear conversion) during SDR->HDR\n" +" upconversion before compressing as HDR.\n" +"\n" +" -hdr_ldr_upconversion_nit_multiplier X: Specify how many nits (candelas per\n" +" sq. meter) LDR/SDR images are converted to after converting to linear\n" +" light. Default is 100 nits. Note: Previous builds used 1 nit. Common\n" +" values are 80-100 nits.\n" +"\n" +"--- ASTC LDR/XUASTC LDR specific options (-ldr_4x4 or -ldr_4x4i, up to 12x12):\n" +"\n" +"Internally both modes (ASTC 4x4-12x12 and XUASTC 4x4-12x12) use the same\n" +"core compressor but with different outputs. All 14 standard ASTC block \n" +"sizes are supported (see the list below).\n" +"\n" +" -ldr_4x4-12x12 or -astc_ldr_4x4-12x12: Enable ASTC LDR 4x4-12x12 mode\n" +"\n" +" -ldr_4x4i-12x12 or -xuastc_ldr_4x4-12x12: Enable XUASTC LDR 4x4-12x12 mode\n" +"\n" +" -quality X: Enables lossy weight grid DCT and sets DCT quality level [1,100]\n" +" (defaults to no DCT). Higher=better quality, but higher bitrate. Good values\n" +" to try are 30-90. Default is no weight grid DCT.\n" +"\n" +" -effort X: Set encoder effort level [0,10]: Encoding speed tradeoff, \n" +" higher=slower but potentially higher overall quality. Default=3, 10=Insane.\n" +" \n" +" -xuastc_arith, -xuastc_hybrid, -xuastc_zstd: Set transcoding speed vs.\n" +" compression ratio tradeoff by selecting the output profile/syntax. Default\n" +" is -xuastc_zstd (fastest, lowest ratio). ZStd is fastest/lowest ratio, arith\n" +" is slowest/highest ratio (3-15% better vs. ZStd).\n" +"\n" +" -xy: Enables lossy supercompression using windowed/bounded RDO for extra\n" +" compression (default is lossless supercompression of the XUASTC texture data\n" +" unless DCT is enabled)\n" +"\n" +" -xyd: Disables lossy supercompression (default, but automatically enabled\n" +" if -quality less than 100 is specified )\n" +"\n" +" -xs: Force disable 2-3 subset usage in all effort levels (lower quality but\n" +" faster compression and faster transcoding to BC7 at certain block sizes)\n" +"\n" +" -xp: Force disable RGB dual plane usage in all effort levels (lower quality\n" +" but faster compression and faster transcoding to BC7 at certain block sizes)\n" +"\n" +" -ts: Use LDR sRGB ASTC decoding profile - the default. This parameter should\n" +" match how the developer will decode or sample the ASTC texture data. Inverse\n" +" of -tl. Same as -srgb.\n" +"\n" +" -tl: Use LDR Linear ASTC decoding profile. Inverse of -ts. Same as -linear.\n" +"\n" +" -weights X Y Z W: Set unsigned integer channel error weights. Defaults are\n" +" 1,1,1,1. Useful to favor certain channels during compression.\n" +"\n" +" -ls_min_psnr X, -ls_min_alpha_psnr X, -ls_thresh_psnr X, -ls_thresh_alpha_psnr\n" +" X, -ls_thresh_edge_psnr X, -ls_thresh_edge_alpha_psnr X: Windowed/bounded RDO\n" +" settings (Lossy supercompression must be enabled, or -xy.)\n" +"\n" +" -xuastc_blurring: Experimental - enable blurred block candidates (MUCH slower\n" +" compression, higher quality).\n" +"\n" +"These modes support all the standard ASTC block sizes. The larger the block\n" +"size, the lower the bitrate on disk and in memory, but the more noticeable the\n" +"artifacts. Some block sizes (4x4, 6x6, and 8x6) have specially optimized direct\n" +"transcoding paths to BC7. For faster direct BC7 transcoding at these block\n" +"sizes, disable RGB dual plane (-xp) and subset usage (-xs).\n" +"\n" +" Block Size Base/Memory Size\n" +" 1. 4x4 8.00 bpp\n" +" 2. 5x4 6.40 bpp\n" +" 3. 5x5 5.12 bpp\n" +" 4. 6x5 4.27 bpp\n" +" 5. 6x6 3.56 bpp\n" +" 6. 8x5 3.20 bpp\n" +" 7. 8x6 2.67 bpp\n" +" 8. 10x5 2.56 bpp\n" +" 9. 10x6 2.13 bpp\n" +" 10. 8x8 2.00 bpp\n" +" 11. 10x8 1.60 bpp\n" +" 12. 10x10 1.28 bpp\n" +" 13. 12x10 1.07 bpp\n" +" 14. 12x12 0.89 bpp\n" +"\n" +"--- More options:\n" +"\n" +" -test: Run an automated LDR ETC1S/UASTC LDR 4x4 encoding and transcoding test.\n" +" Returns EXIT_FAILURE on any failures.\n" +"\n" +" -test_hdr_4x4/-test_hdr_6x6/-test_hdr_6x6i: Run automated UASTC HDR encoding\n" +" and transcoding tests. Returns EXIT_FAILURE on any failures.\n" +"\n" +" -test_xuastc: Run an automated XUASTC LDR encoding and transcoding test.\n" +" Returns EXIT_FAILURE on any failures.\n" +"\n" +" -test_dir: Optional directory of test files. Defaults to \"../test_files\".\n" +"\n" +" -y_flip: Flip input images vertically before compression\n" +"\n" +" -normal_map: Tunes codec parameters for better quality on normal maps (linear\n" +" colorspace metrics, linear mipmap filtering, no selector RDO, no sRGB)\n" +"\n" +" -no_alpha: Always output non-alpha basis files, even if one or more inputs has\n" +" alpha\n" +"\n" +" -force_alpha: Always output alpha basis files, even if no inputs has alpha\n" +"\n" +" -separate_rg_to_color_alpha: Separate input R and G channels to RGB and A (for\n" +" tangent space XY normal maps)\n" +"\n" +" -swizzle rgba: Specify swizzle for the 4 input color channels using r, g, b\n" +" and a (the -separate_rg_to_color_alpha flag is equivalent to rrrg)\n" +"\n" +" -renorm: Renormalize each input image before any further\n" +" processing/compression\n" +"\n" +" -no_multithreading: Disable multithreading\n" +"\n" +" -max_threads X: Use at most X threads total when multithreading is enabled\n" +" (this includes the main thread)\n" +"\n" +" -wasi_threads: Set number of threads to use in WASI threading builds\n" +" (default=8, only used in WASI threading builds)\n" +"\n" +" -no_ktx: Disable KTX writing when unpacking (faster, less output files)\n" +"\n" +" -ktx_only: Only write KTX files when unpacking (faster, less output files)\n" +"\n" +" -write_out: Write 3dfx OUT files when unpacking FXT1 textures\n" +"\n" +" -format_only: Only unpack the specified format, by its numeric code.\n" +"\n" +" -etc1_only: Only unpack to ETC1, skipping the other texture formats\n" +" during -unpack\n" +"\n" +" -disable_hierarchical_endpoint_codebooks: Disable hierarchical endpoint\n" +" codebook usage, slower but higher quality on some compression levels\n" +"\n" +" -compare_ssim: Compute and display SSIM of image comparison (slow)\n" +"\n" +" -compare_plot: Display histogram plots in -compare mode\n" +"\n" +" -bench: UASTC benchmark mode, for development only\n" +"\n" +" -resample X Y: Resample all input textures to XxY pixels using a box filter\n" +"\n" +" -resample_factor X: Resample all input textures by scale factor X using a box\n" +" filter\n" +"\n" +" -no_sse: Forbid all SSE instruction set usage\n" +"\n" +" -validate_etc1s: Validate internal ETC1S compressor's data structures during\n" +" compression (slower, intended for development).\n" +"\n" +" -ktx2_animdata_duration X: Set KTX2animData duration field to integer value X\n" +" (only valid/useful for -tex_type video, default is 1)\n" +"\n" +" -ktx2_animdata_timescale X: Set KTX2animData timescale field to integer value\n" +" X (only valid/useful for -tex_type video, default is 15)\n" +"\n" +" -ktx2_animdata_loopcount X: Set KTX2animData loopcount field to integer value\n" +" X (only valid/useful for -tex_type video, default is 0)\n" +"\n" +" -framerate X: Set framerate in .basis header to X/frames sec.\n" +"\n" +" -ktx2_no_zstandard: Don't compress UASTC texture data using Zstandard -- store\n" +" it uncompressed instead.\n" +"\n" +" -ktx2_zstandard_level X: Set ZStandard compression level to X (see Zstandard\n" +" documentation, default level is 6)\n" +"\n" +" -tonemap_dither: Dither tonemapper's 8-bit/component output by adding a small\n" +" amount of white noise, only used with -tonemap mode\n" +"\n" +"--- Mipmap Generator Options:\n" +"\n" +"By default, SDR textures will be converted from sRGB to linear light before\n" +"mipmap filtering, then back to sRGB (for the RGB color channels) unless -linear\n" +"is specified. You can override this behavior with -mip_srgb/-mip_linear.\n" +"\n" +" -mipmap: Generate mipmaps for each source image\n" +"\n" +" -mip_srgb: Convert image to linear before filtering, then back to sRGB.\n" +" (This is set automatically by default, unless you override it.)\n" +"\n" +" -mip_linear: Keep image in linear light during mipmap filtering (i.e. do not\n" +" convert to/from sRGB for filtering purposes). (This is set automatically by\n" +" default, unless you override it.)\n" +"\n" +" -mip_scale X: Set mipmap filter kernel's scale, lower=sharper, higher=more\n" +" blurry, default is 1.0 (quite conservative).\n" +"\n" +" -mip_filter X: Set mipmap filter kernel, default is kaiser. Supported filters:\n" +" box, tent, bell, b-spline, mitchell, blackman, lanczos3, lanczos4, lanczos6,\n" +" lanczos12, kaiser, gaussian, catmullrom, quadratic_interp, quadratic_approx,\n" +" quadratic_mix\n" +"\n" +" -mip_renorm: Renormalize normal map to unit length vectors after filtering\n" +"\n" +" -mip_clamp: Use clamp addressing on borders, instead of wrapping\n" +"\n" +" -mip_fast: Use faster mipmap generation (resample from previous mip, not\n" +" always first/largest mip level). The default.\n" +"\n" +" -mip_slow: Always resample each mipmap level starting from the largest mipmap.\n" +" Higher quality, but slower. Opposite of -mip_fast. \n" +"\n" +" -mip_smallest X: Set smallest pixel dimension for generated mipmaps, default\n" +" is 1 pixel \n" +"\n" +"--- Transcoding Options (used while unpacking, validating after compression):\n" +"\n" +"These settings control the \"decode flags\" used while transcoding:\n" +"\n" +" -higher_quality_transcoding: Enable higher quality, but slower, transcoding\n" +"\n" +" -no_deblocking: Always disable adaptive deblocking filter on all block sizes\n" +" (XUASTC/ASTC LDR 4x4-12x12 only). By default only block sizes >8x6 are\n" +" deblocked while transcoding. (No deblocking ever occurs when transcoding to\n" +" ASTC: only when re-encoding ASTC to another format, to lower artifacts.)\n" +"\n" +" -force_deblocking: Always use adaptive deblocking filter, even for block sizes\n" +" <= 8x6 (XUASTC/ASTC LDR 4x4-12x12 only)\n" +"\n" +" -stronger_deblocking: Use stronger adaptive deblocking filtering (XUASTC/ASTC\n" +" LDR 4x4-12x12 only)\n" +"\n" +" -no_etc1s_chroma_filtering: Disable adaptive ETC1S transcode chroma filter,\n" +" for faster transcoding to BC7.\n" +"\n" +" -fast_xuastc_ldr_bc7_transcoding: Use much faster, but lower quality, XUASTC\n" +" LDR 4x4/6x6/8x6 direct BC7 transcoders (the default)\n" +"\n" +" -no_fast_xuastc_ldr_bc7_transcoding: Disable much faster, but slightly lower\n" +" quality, XUASTC LDR 4x4/6x6/8x6 direct BC7 transcoders\n" +"\n" +"--- Low-Level ETC1S backend endpoint/selector RDO codec options:\n" +"\n" +" -no_selector_rdo: Disable backend's selector rate distortion optimizations\n" +" (slightly faster, less noisy output, but lower quality per output bit)\n" +"\n" +" -selector_rdo_thresh X: Set selector RDO quality threshold, default is 1.25,\n" +" lower is higher quality but less quality per output bit (try 1.0-3.0)\n" +"\n" +" -no_endpoint_rdo: Disable backend's endpoint rate distortion optimizations\n" +" (slightly faster, less noisy output, but lower quality per output bit)\n" +"\n" +" -endpoint_rdo_thresh X: Set endpoint RDO quality threshold, default is 1.5,\n" +" lower is higher quality but less quality per output bit (try 1.0-3.0)\n" +"\n" +"--- Set various low-level fields in the Basis file header:\n" +"\n" +" -userdata0 X: Set 32-bit userdata0 field in Basis file header to X (X is a\n" +" signed 32-bit int)\n" +"\n" +" -userdata1 X: Set 32-bit userdata1 field in Basis file header to X (X is a\n" +" signed 32-bit int)\n" +"\n" +"--- Example LDR ETC1S/UASTC LDR 4x4 command lines:\n" +"\n" +" - basisu x.png : Compress sRGB image x.png to x.ktx2 using default settings\n" +" (multiple filenames OK, use -tex_array if you want a tex array vs. multiple\n" +" output files)\n" +"\n" +" - basisu -basis x.qoi : Compress sRGB image x.qoi to x.basis (supports 24-bit\n" +" or 32-bit .QOI files)\n" +"\n" +" - basisu x.ktx2 : Unpack x.basis to PNG/KTX files (multiple filenames OK)\n" +"\n" +" - basisu x.basis : Unpack x.basis to PNG/KTX files (multiple filenames OK)\n" +"\n" +" - basisu -uastc x.png -uastc_rdo_l 2.0 -ktx2 -stats : Compress to a\n" +" UASTC .KTX2 file with RDO (rate distortion optimization) to reduce .KTX2\n" +" compressed file size\n" +"\n" +" - basisu -file x.png -mipmap -y_flip : Compress a mipmapped x.ktx2 file from\n" +" an sRGB image named x.png, Y flip each source image\n" +"\n" +" - basisu -validate -file x.basis : Validate x.basis (check header, check file\n" +" CRC's, attempt to transcode all slices)\n" +"\n" +" - basisu -unpack -file x.basis : Validates, transcodes and unpacks x.basis to\n" +" mipmapped .KTX and RGB/A .PNG files (transcodes to all supported GPU texture\n" +" formats)\n" +"\n" +" - basisu -q 255 -file x.png -mipmap -debug -stats : Compress sRGB x.png to\n" +" x.ktx2 at quality level 255 with compressor debug output/statistics\n" +"\n" +" - basisu -linear -max_endpoints 16128 -max_selectors 16128 -file x.png :\n" +" Compress non-sRGB x.png to x.ktx2 using the largest supported manually\n" +" specified codebook sizes\n" +"\n" +" - basisu -basis -comp_level 2 -max_selectors 8192 -max_endpoints\n" +" 8192 -tex_type video -framerate 20 -multifile_printf\n" +" \"x%02u.png\" -multifile_first 1 -multifile_num 20 : Compress a 20 sRGB source\n" +" image video sequence (x01.png, x02.png, x03.png, etc.) to x01.basis\n" +"\n" +"--- Example UASTC HDR 4x4 command lines:\n" +"\n" +" - basisu x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR 4x4 .KTX2\n" +" file. LDR/SDR images will be upconverted to linear light HDR before\n" +" compression. See HDR upconversion options, above.\n" +"\n" +" - basisu -hdr_4x4 x.exr : Compress a HDR .EXR image to a UASTC HDR 4x4 .KTX2\n" +" file.\n" +"\n" +" - basisu x.hdr -uastc_level 0 : Compress a HDR .hdr image to a UASTC HDR\n" +" 4x4 .KTX2 file, fastest encoding but lowest quality\n" +"\n" +" - basisu -hdr x.png : Compress a LDR .PNG image to UASTC HDR 4x4 (image is\n" +" converted from sRGB to linear light first, use -hdr_ldr_no_srgb_to_linear to\n" +" disable)\n" +"\n" +" - basisu x.hdr -uastc_level 3 : Compress a HDR .hdr image to UASTC HDR 4x4 at\n" +" higher quality (-uastc_level 4 is highest quality, but very slow encoding)\n" +"\n" +" - basisu x.hdr -uastc_level 3 -mipmap -basis -stats -debug -debug_images :\n" +" Compress a HDR .hdr image to UASTC HDR 4x4, .basis output file, at higher\n" +" quality, generate mipmaps, output statistics and debug information, and write\n" +" tone mapped debug images\n" +"\n" +" - basisu x.hdr -stats -hdr_favor_astc -hdr_uber_mode -uastc_level 4 : Highest\n" +" achievable ASTC HDR 4x4 quality (very slow encoding, BC6H quality is traded\n" +" off)\n" +"\n" +"--- Example RDO ASTC/UASTC HDR 6x6 command lines:\n" +"\n" +" - basisu -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR\n" +" 6x6 .KTX2 file. LDR/SDR images will be upconverted to linear light HDR before\n" +" compression. See HDR upconversion options, above.\n" +"\n" +" - basisu -lambda 1000 -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to\n" +" a UASTC HDR 6x6 .KTX2 file with rate-distortion optimization (RDO), at lambda\n" +" level 1000.\n" +"\n" +" - basisu -hdr_6x6i x.exr : Compress a HDR .EXR image to a compressed\n" +" intermediate format UASTC HDR 6x6 .KTX2 file.\n" +"\n" +" - basisu -lambda 1000 -hdr_6x6i x.exr : Compress a HDR .EXR image to a\n" +" compressed intermediate format UASTC HDR 6x6 .KTX2 file with rate-distortion\n" +" optimization (RDO), at lambda level 1000.\n" +"\n" +"--- Example ASTC/XUASTC LDR 4x4-12x12 command lines:\n" +"\n" +" - basisu -ldr_6x6i -quality 75 -xuastc_arith test.png : Compress test.png to XUASTC\n" +" LDR 6x6 using weight grid DCT with setting 75 and the arith profile for\n" +" higher compression.\n" +"\n" +" - basisu -ldr_4x4 -effort 5 test.png : Compress test.png to ASTC LDR 4x4\n" +"\n" +" - basisu -mipmap -ldr_10x5i test.png : Compress test.png to XUASTC LDR 10x5,\n" +" using lossless ZStd supercompression, with mipmaps\n" +"\n" +"--- ETC1S Texture Video Notes: Use -comp_level 2 or higher for better codebook\n" +"generation, specify very large codebooks using -max_endpoints\n" +"and -max_selectors, and reduce the default endpoint RDO threshold\n" +"(-endpoint_rdo_thresh) to around 1.25. Videos may have mipmaps and alpha\n" +"channels. Videos must always be played back by the transcoder in first to last\n" +"image order. Video files currently use I-Frames on the first image, and\n" +"P-Frames using conditional replenishment on subsequent frames.\n" +"\n" +"--- Low-level ETC1S compression (Effort) Level (-comp_level X) Details \n" +"\n" +"This setting controls the ETC1S speed vs. quality tradeoff. (Use -q to control\n" +"the quality vs. compressed size tradeoff.):\n" +"\n" +" - Level 0: Fastest, but has marginal quality and can be brittle on complex\n" +"images. Avg. Y dB: 35.45\n" +"\n" +" - Level 1: Hierarchical codebook searching, faster ETC1S encoding. 36.87 dB,\n" +"~1.4x slower vs. level 0. (This is the default setting.)\n" +"\n" +" - Level 2: Use this or higher for video. Hierarchical codebook searching.\n" +"36.87 dB, ~1.4x slower vs. level 0. (This is the v1.12's default setting.)\n" +"\n" +" - Level 3: Full codebook searching. 37.13 dB, ~1.8x slower vs. level 0.\n" +"(Equivalent to the initial release's default settings.)\n" +"\n" +" - Level 4: Hierarchical codebook searching, codebook k-means iterations. 37.15\n" +"dB, ~4x slower vs. level 0\n" +"\n" +" - Level 5: Full codebook searching, codebook k-means iterations. 37.41 dB,\n" +"~5.5x slower vs. level 0.\n" +"\n" +" - Level 6: Full codebook searching, twice as many codebook k-means iterations,\n" +"best ETC1 endpoint opt. 37.43 dB, ~12x slower vs. level 0" diff --git a/external/basis_universal/bin/basisu_mt.wasm b/external/basis_universal/bin/basisu_mt.wasm new file mode 100644 index 0000000000..983f8a7b4f Binary files /dev/null and b/external/basis_universal/bin/basisu_mt.wasm differ diff --git a/external/basis_universal/bin/basisu_st.wasm b/external/basis_universal/bin/basisu_st.wasm new file mode 100644 index 0000000000..e21942b390 Binary files /dev/null and b/external/basis_universal/bin/basisu_st.wasm differ diff --git a/external/basis_universal/bin/clean.bat b/external/basis_universal/bin/clean.bat index d31658fc0a..6952615b57 100644 --- a/external/basis_universal/bin/clean.bat +++ b/external/basis_universal/bin/clean.bat @@ -1,7 +1,13 @@ +@echo off del *.exr del *.png -del *.ktx -del *.ktx2 del *.dds del *.astc -del *.basis +del *.tga + +for %%F in (*.ktx) do ( + if /I "%%~xF"==".ktx" ( + echo Deleting "%%F" + del "%%F" + ) +) diff --git a/external/basis_universal/bin/clean.sh b/external/basis_universal/bin/clean.sh new file mode 100644 index 0000000000..e8b0b96b72 --- /dev/null +++ b/external/basis_universal/bin/clean.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +rm -f *.exr +rm -f *.png +rm -f *.dds +rm -f *.astc +rm -f *.ktx +rm -f *.tga + diff --git a/external/basis_universal/bin/example_capi_mt.wasm b/external/basis_universal/bin/example_capi_mt.wasm new file mode 100644 index 0000000000..dcf67f855e Binary files /dev/null and b/external/basis_universal/bin/example_capi_mt.wasm differ diff --git a/external/basis_universal/bin/example_capi_st.wasm b/external/basis_universal/bin/example_capi_st.wasm new file mode 100644 index 0000000000..e100f0da07 Binary files /dev/null and b/external/basis_universal/bin/example_capi_st.wasm differ diff --git a/external/basis_universal/bin/example_mt.wasm b/external/basis_universal/bin/example_mt.wasm new file mode 100644 index 0000000000..807adea3aa Binary files /dev/null and b/external/basis_universal/bin/example_mt.wasm differ diff --git a/external/basis_universal/bin/example_st.wasm b/external/basis_universal/bin/example_st.wasm new file mode 100644 index 0000000000..86b5764d2e Binary files /dev/null and b/external/basis_universal/bin/example_st.wasm differ diff --git a/external/basis_universal/bin/example_transcoding_mt.wasm b/external/basis_universal/bin/example_transcoding_mt.wasm new file mode 100644 index 0000000000..ad9c6f3bd6 Binary files /dev/null and b/external/basis_universal/bin/example_transcoding_mt.wasm differ diff --git a/external/basis_universal/bin/example_transcoding_st.wasm b/external/basis_universal/bin/example_transcoding_st.wasm new file mode 100644 index 0000000000..dcc76967a2 Binary files /dev/null and b/external/basis_universal/bin/example_transcoding_st.wasm differ diff --git a/external/basis_universal/bin/runw.bat b/external/basis_universal/bin/runw.bat new file mode 100644 index 0000000000..016441957f --- /dev/null +++ b/external/basis_universal/bin/runw.bat @@ -0,0 +1,6 @@ +@ECHO OFF +REM Example: "runw.bat test_images/xmen.png" +REM Example: "runw.bat /bik/bik1.png" + +REM wasmtime --dir=. --dir=.. --dir=..\test_files --dir=d:/dev/test_images::/test_images --dir=d:/dev/test_images/bik::/bik basisu_st.wasm %* +wasmtime --dir=. --dir=.. --dir=..\test_files basisu_st.wasm %* diff --git a/external/basis_universal/bin/runw.sh b/external/basis_universal/bin/runw.sh new file mode 100644 index 0000000000..6b7f3952fd --- /dev/null +++ b/external/basis_universal/bin/runw.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +wasmtime run --dir=. --dir=../test_files ./basisu_st.wasm "$@" diff --git a/external/basis_universal/bin/runwt.bat b/external/basis_universal/bin/runwt.bat new file mode 100644 index 0000000000..d397419072 --- /dev/null +++ b/external/basis_universal/bin/runwt.bat @@ -0,0 +1,7 @@ +@ECHO OFF +REM Example: "runw.bat test_images/xmen.png" +REM Example: "runw.bat /bik/bik1.png" + +REM wasmtime --wasm threads=yes --wasi threads=yes --dir=. --dir=.. --dir=..\test_files::/test_files --dir=d:/dev/test_images::/test_images --dir=d:/dev/test_images/bik::/bik basisu_mt.wasm %* + +wasmtime --wasm threads=yes --wasi threads=yes --dir=. --dir=.. --dir=..\test_files::/test_files basisu_mt.wasm %* diff --git a/external/basis_universal/bin/runwt.sh b/external/basis_universal/bin/runwt.sh new file mode 100644 index 0000000000..3009600e42 --- /dev/null +++ b/external/basis_universal/bin/runwt.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +wasmtime run --dir=. --dir=../test_files --wasm threads=yes --wasi threads=yes ./basisu_mt.wasm "$@" diff --git a/external/basis_universal/bin/test_wasm.bat b/external/basis_universal/bin/test_wasm.bat new file mode 100644 index 0000000000..0091bbc5af --- /dev/null +++ b/external/basis_universal/bin/test_wasm.bat @@ -0,0 +1,2 @@ +wasmtime --dir=. --dir=.. --dir=..\test_files basisu.wasm -test + diff --git a/external/basis_universal/build_python_win.bat b/external/basis_universal/build_python_win.bat new file mode 100644 index 0000000000..3e5401be27 --- /dev/null +++ b/external/basis_universal/build_python_win.bat @@ -0,0 +1,59 @@ +@echo off + +echo =========================================== +echo Building Python extensions (Windows) +echo =========================================== + +REM Set the Python executable path (edit if needed) +set PY_EXE=C:\Users\richg\AppData\Local\Programs\Python\Python312\python.exe + +REM Ensure Python exists +if not exist "%PY_EXE%" ( + echo ERROR: Python 3.12 executable not found: + echo %PY_EXE% + echo Please install Python 3.12 or update PY_EXE in this script. + exit /b 1 +) + +REM Create build directory if missing +if not exist build_python_win ( + echo Creating build_python_win directory... + mkdir build_python_win +) + +cd build_python_win + +echo Running CMake configure... +cmake -G "Visual Studio 17 2022" -A x64 ^ + -DBUILD_PYTHON=ON ^ + -DBUILD_WASM=OFF ^ + -DPYTHON_EXECUTABLE="%PY_EXE%" ^ + .. + +IF ERRORLEVEL 1 ( + echo. + echo *** CMake configure FAILED *** + exit /b 1 +) + +echo. +echo CMake configure OK. +echo Starting build... + +cmake --build . --config Release + +IF ERRORLEVEL 1 ( + echo. + echo *** Build FAILED *** + exit /b 1 +) + +echo. +echo =========================================== +echo Build SUCCESSFUL! +echo Output in: python/basisu_py/ +echo =========================================== + +cd .. + +exit /b 0 diff --git a/external/basis_universal/cmd_help/cmd_help.txt b/external/basis_universal/cmd_help/cmd_help.txt new file mode 100644 index 0000000000..231147db58 --- /dev/null +++ b/external/basis_universal/cmd_help/cmd_help.txt @@ -0,0 +1,713 @@ +Basis Universal LDR/HDR GPU Texture Supercompression System v1.65.0 (x64) +Copyright (C) 2019-2026 Binomial LLC, All rights reserved + +Usage: basisu filename [filename ...] +See project and wiki at: https://github.com/BinomialLLC/basis_universal + +The default processing mode is compression of one or more .PNG/.TGA/.JPG/.QOI/ +.DDS/.EXR/.HDR files to a LDR or HDR .KTX2 file. Alternate modes: + + -unpack: Use transcoder to unpack a .basis/.KTX2 file to one or more .KTX, + .DDS, .PNG, .ASTC, etc. files. + + -info: Display high-level information about a .basis/.KTX2 file + + -validate: Validate and display information about a .basis/.KTX2 file + + -compare: Compare two LDR PNG/BMP/TGA/JPG/QOI images specified with -file, + output PSNR and SSIM statistics and RGB/A delta images + + -compare_hdr: Compare two HDR .EXR/.HDR images specified with -file, output + PSNR statistics and RGB delta images + + -tonemap: Tonemap an HDR or EXR image to PNG at multiple exposures, use -file + to specify filename + + -peek_astc: Read an .astc file and calculate statistics (for testing/dev) + + -version or --version: Print version and exit + +--- Intro: + +This tool compresses LDR/SDR and HDR images and textures to a Basis Universal +supercompressed GPU texture, which can be written to supercompressed .basis or +standard .KTX2 files. It supports a number of SDR and HDR codecs, each with +different quality, transcoding performance, and bitrate tradeoffs. A SDR/HDR +mipmap generator is also included. This tool can also examine and +unpack .KTX2/.basis files to .PNG, .KTX (v1), .ASTC, or .DDS files using its +single source file transcoder library. + +The list of supported texture/supercompressed texture codecs: + -LDR: ETC1S (SDR default), RDO UASTC LDR 4x4, ASTC or XUASTC LDR 4x4-12x12 + -HDR: UASTC HDR 4x4 (HDR default), RDO ASTC HDR 6x6, UASTC HDR 6x6 + +RDO=Rate-Distortion Optimization. Two key parameters (quality and effort) have +been unified across all the codecs: + +- The -quality X parameter, where X ranges from [0, 100], controls the +compression quality vs. bitrate (output file size) tradeoff for those codecs +supporting supercompression or RDO (Rate-distortion optimization). 100=max +quality, and lower levels produce smaller files with more distortion. + +- The -effort X parameter, where X ranges from [0, 10], controls the +compression speed (and max CPU usage) vs. max achievable quality tradeoff. +Low efforts result in more distortion/artifacts, but faster compression. Lower +efforts result in less utilization of the underlying GPU block format's +capabilities. + +Key Definitions: +"Quality" controls the explicit tradeoff between output distortion and output +file bitrate (in bits per pixel or target). At max quality (100) each +compressor will output the lowest distortion it's capable of at its currently +configured effort level. + +"Effort" controls how intensely a compressor uses the CPU to focus on each +block format's encoding capabilities. Low effort levels only target a set of +core or basic capabilities (specific to each output target format), while +higher effort levels allow each compressor to explore more of each target's +features (at the cost of higher CPU time). Lower effort levels result in more +brittle compression (higher distortion on tough image/texture features). +Effort=0 fastest compression, effort=10=extremely slow. + +Unless an explicit mode is specified, if one or more files have the .basis +or .KTX2 extension this tool defaults to unpack mode. + +By default, the compressor assumes the input is in the sRGB colorspace (like +typical photos/albedo textures). If the input is NOT sRGB (like a normal map), +be sure to specify -linear for less artifacts. Depending on the content type, +some experimentation may be needed. + +The TinyEXR library is used to read .EXR images. Crucially, this small library +does not support all .EXR compression methods. For unsupported images, you can +use a tool like ImageMagick to convert them to uncompressed .EXR. + +For .DDS source files: Mipmapped or plain 2D textures (but not cubemaps) are +supported. Only uncompressed 32-bit RGBA/BGRA, half float RGBA, or float +RGBA .DDS files are supported. In -tex_array mode, if a .DDS file is specified, +all source files must be in .DDS format. + +Filenames prefixed with a @ symbol are read as filename listing files. Listing +text files specify which actual filenames to process (one filename per line). + +--- High-Level Texture Mode (Codec) Selection: + + 1. -etc1s: Encode to supercompressed ETC1S LDR (the default for SDR/LDR + inputs). Roughly .8-2.5 bpp. Supports temporal texture supercompression + (texture video) with skip blocks (Conditional Replenishment), with global + codebooks shared across all frames. + + 2. -uastc/-uastc_ldr: UASTC LDR 4x4. Encode to UASTC LDR 4x4, a custom high + quality virtual texture format designed for fast transcoding to numerous + GPU texture formats. Roughly 5-8 bpp. Supports RDO encoding using -lambda X + option - see options below. In this mode the multi-target compressor + optimizes for a balance of transcoded ASTC 4x4 LDR and BC7 quality. + + 3. -hdr/-hdr_4x4: UASTC HDR 4x4. Encode input as UASTC HDR 4x4 (the default if + any input file has the .EXR or .HDR extension, or if any .DDS file is HDR). + Output is standard, but constrained, ASTC HDR 4x4. Roughly 5-8 bpp. In this + mode the dual-target compressor optimizes for a balance of transcoded ASTC + 4x4 HDR and BC6H quality. + + 4. -hdr_6x6: ASTC HDR 6x6. Encode input as RDO or highest quality standard + ASTC HDR 6x6. Use -quality (preferred) or -lambda X (low-level, try + 100-20000 or higher) option to enable RDO ASTC HDR 6x6, where x controls + the quality vs. size tradeoff. Roughly 1.2-3.2 bpp. + + 5. -hdr_6x6i: UASTC HDR 6x6. Encode input as supercompressed UASTC HDR 6x6 + intermediate. Use -quality (preferred) or -lambda X (low-level, try + 100-20000 or higher) option to enable RDO UASTC HDR 6x6, where x controls + the quality vs. size tradeoff. Roughly 1-3.2 bpp. + + 6. XUASTC LDR 4x4-12x12: -ldr_4x4i, -ldr_5x4i, -ldr_5x5i, -ldr_6x5i, + -ldr_6x6i, -ldr_8x5i, -ldr_8x6i, -ldr_10x5i, -ldr_10x6i, -ldr_8x8i, + -ldr_10x8i, -ldr_10x10i, -ldr_12x10i, -ldr_12x12i: + Compress to supercompressed XUASTC LDR/SDR using the specific + ASTC block size. See additional ASTC/XUASTC LDR specific options + (-effort, -quality, -xy, -ts, -tl, etc.) below. Roughly .3-5.7 bpp + + 7. ASTC LDR 4x4-12x12: -ldr_4x4, -ldr_5x4, -ldr_5x5, -ldr_6x5, -ldr_6x6, + -ldr_8x5, -ldr_8x6, -ldr_10x5, -ldr_10x6, -ldr_8x8, -ldr_10x8, + -ldr_10x10, -ldr_12x10, -ldr_12x12: + Compress to standard or ZStd supercompressed ASTC LDR/SDR using + the specific ASTC block size. See additional ASTC LDR specific + options (-effort, -quality, -xy, -ts, -tl, etc.) below. .89-8 bpp before + ZStd compression. + +--- Tool Options: + + -ktx2: Write .KTX2 files (the default). By default, UASTC LDR/HDR 4x4 and ASTC + 6x6 files will be compressed using Zstandard unless -ktx2_no_zstandard is + specified. + + -basis: Write .basis files instead of .KTX2 files. + + -file filename.png/tga/jpg/qoi/exr/hdr: Input image filename, multiple images + are OK, use -file X for each input filename (prefixing input filenames + with -file is optional) + + -alpha_file filename.png/tga/jpg/qoi: Input alpha image filename, multiple + images are OK, use -file X for each input filename (must be paired + with -file), images converted to REC709 grayscale and used as input alpha + + -output_file filename: Output .basis/.KTX2 filename + + -output_path: Output .basis/.KTX2 files to specified directory. + + -debug or -verbose: Enable codec debug print to stdout (slightly slower). + + -debug_images: Enable codec debug images (much slower). + + -stats: Compute and display image quality metrics (slightly to much slower). + + -individual: Process input images individually and output + multiple .basis/.KTX2 files (not as a texture array - this is now the default + as of v1.16) + + -parallel: Compress multiple textures simultaneously (one per thread), instead + of one at a time. Compatible with OpenCL mode. This is much faster, but in + OpenCL mode the driver is pushed harder, and the CLI output will be jumbled. + + -linear: Use linear colorspace metrics (instead of the default sRGB or scaled + RGB for HDR), write linear transfer function setting to KTX2/basis file, and + by default linear (not sRGB) mipmap filtering (unless overridden). Same + as -tl. + + -srgb: Use sRGB colorspace metrics, write sRGB transfer function setting to + KTX2/basis file, and by default use sRGB mipmap filtering (unless + overridden). Same as -ts. + + -tex_type <2d, 2darray, 3d, video, cubemap>: Set Basis file header's texture + type field. Cubemap arrays require multiples of 6 images, in X+, X-, Y+, Y-, + Z+, Z- order, each image must be the same resolutions. 2d=arbitrary 2D + images, 2darray=2D array, 3D=volume texture slices, video=video frames, + cubemap=array of faces. For 2darray/3d/cubemaps/video, each source image's + dimensions and # of mipmap levels must be the same. For video, the .basis + file will be written with the first frame being an I-Frame, and subsequent + frames being P-Frames (using conditional replenishment). Playback must always + occur in order from first to last image. + + -cubemap: same as -tex_type cubemap + + -tex_array: Process input images as a single texture array and write a + single .basis/.KTX2 file (the former default before v1.16) + + -fuzz_testing: Use with -validate: Disables CRC16 validation of file contents + before transcoding + + -multifile_printf: printf() format string to use to compose multiple filenames + + -multifile_first: The index of the first file to process, default is 0 (must + specify -multifile_printf and -multifile_num) + + -multifile_num: The total number of files to process. + + -opencl: Enable OpenCL usage (currently only accelerates ETC1S encoding) + + -opencl_serialize: Serialize all calls to the OpenCL driver (to work around + buggy drivers, only useful with -parallel) + +--- ETC1S specific options (-etc1s - the LDR/SDR default): + + -quality X and -effort X: Set quality (1-100) and effort (0-10) levels + + -q X: Low-level ETC1S quality level, 1-255, default is 128, lower=better + compression/lower quality/faster, higher=less compression/higher + quality/slower, default is 128. For even higher quality, + use -max_endpoints/-max_selectors. (-quality is preferred.) + + -comp_level X: Low-level ETC1S speed vs. quality tradeoff. Range is 0-6, + default is 1. Higher values=MUCH slower, but slightly higher quality. Higher + levels intended for videos. (-effort is preferred.) + + -max_endpoints X: ETC1S: Manually set the max number of color endpoint + clusters from 1-16128, use instead of -q + + -max_selectors X: ETC1S: Manually set the max number of color selector + clusters from 1-16128, use instead of -q + +--- UASTC LDR/HDR 4x4 specific options (-uastc or -uastc_ldr): + + -quality X and -effort X: Set quality (1-100) and effort (0-10) levels + + -uastc, -uastc_ldr or -uastc_ldr_4x4: Enable UASTC LDR 4x4 texture mode, + instead of the default ETC1S mode. Significantly higher texture quality, but + much larger (~8bpp) files. (Note that UASTC LDR 4x4 .basis files must be + losslessly compressed by the user.) + + -uastc_level: Set low-level UASTC LDR/HDR 4x4 encoding effort level. LDR Range + is [0,4], default is 2, higher=slower but higher quality. 0=fastest/lowest + quality, 3=slowest practical option, 4=impractically slow/highest achievable + quality. UASTC HDR 4x4 range is [0,4]: higher=slower, but higher quality. HDR + 4x4 default level=1. + + -uastc_rdo_l X: Enable UASTC LDR 4x4 RDO post-processing and set the low-level + UASTC LDR 4x4 RDO quality scalar (lambda) to X. Lower values=higher + quality/larger LZ compressed files, higher values=lower quality/smaller LZ + compressed files. Good range to try is [.25-10]. Note: Previous versons used + the -uastc_rdo_q option, which was removed because the RDO algorithm was + changed. + + -uastc_rdo_d X: Set UASTC LDR 4x4 RDO dictionary size in bytes. Default is + 4096, max is 65536. Lower values=faster, but less compression. + + -uastc_rdo_b X: Set UASTC LDR 4x4 RDO max smooth block error scale. Range is + [1,300]. Default is 10.0, 1.0=disabled. Larger values suppress more artifacts + (and allocate more bits) on smooth blocks. + + -uastc_rdo_s X: Set UASTC LDR 4x4 RDO max smooth block standard deviation. + Range is [.01,65536]. Default is 18.0. Larger values expand the range of + blocks considered smooth. + + -uastc_rdo_f: Don't favor simpler UASTC LDR 4x4 modes in RDO mode. + + -uastc_rdo_m: Disable RDO multithreading (slightly higher compression, + deterministic). + +--- UASTC HDR 4x4 specific options (-hdr or -hdr_4x4 - the HDR default): + + -hdr, -hdr_4x4, or -uastc_hdr_4x4: Enable UASTC HDR 4x4 mode + + -quality X and -effort X: Set quality (1-100) and effort (0-10) levels + + -uastc_level X: Sets the low-level UASTC HDR 4x4 compressor's effort level. + Valid range is [0,4]: higher=slower but higher quality. HDR + default=1. Level 0=fastest/lowest quality, 3=highest practical + setting, 4=exhaustive + + -hdr_uber_mode: Allow the UASTC HDR 4x4 encoder to try varying the CEM 11 + selectors more for slightly higher quality (slower). This may negatively + impact BC6H quality, however. + + -hdr_ultra_quant: UASTC HDR 4x4: Try to find better quantized CEM 7/11 + endpoint values (slower). + + -hdr_favor_astc: UASTC HDR 4x4: By default the dual-target UASTC HDR 4x4 + encoder tries to strike a balance or even slightly favor BC6H quality. If + this option is specified, ASTC HDR 4x4 quality is favored instead. + +--- ASTC/UASTC HDR 6x6 specific options (-hdr_6x6 or -hdr_6x6i): + +Internally both modes use the same compressor which can generate either +standard ASTC HDR 6x6 (with optional RDO) or UASTC HDR 6x6 (supercompressed +with a custom format). + + -hdr_6x6 or -astc_hdr_6x6: Enable RDO ASTC HDR 6x6 mode + + -hdr_6x6i or -uastc_hdr_6x6: Enable UASTC HDR 6x6 mode + + -quality X and -effort X: Set quality (1-100) and effort (0-10) levels + + -lambda X: Low-level option to enable rate distortion optimization (RDO) and + directly control the HDR 6x6 compressor's lambda setting. The + higher this value, the lower the quality, but the smaller the file + size. Try 100-20000, or higher values on some images. Upconverted + SDR images can generally tolerate much higher lambda settings vs. + true HDR images. + + -hdr_6x6_level X: Low-level option to set the codec to 6x6 HDR mode (same + as -hdr_6x6) and controls encoder performance vs. max quality + tradeoff. X may range from [0,12]. Default level is 2. Higher + values result in better quality but slower encoding. Values above + 10 are extremely slow. + + -hdr_6x6i_level X: Low-level option to set the codec to 6x6 HDR intermediate + mode (same as -hdr_6x6i) and controls encoder performance vs. max + quality tradeoff. X may range from [0,12]. Default level is 2. + + -rec_2020: The input image's gamut is Rec. 2020 vs. the default Rec. 709 - for + accurate colorspace error calculations. This value will also be + written to the KTX2 file's header in the DFD. + + -hdr_6x6_jnd + X, -hdr_6x6_extra_pats, -hdr_6x6_brute_force_pats, + -hdr_6x6_comp_levels X Y, or -hdr_6x6i_comp_levels X Y: Low-level + control over the encoder's configuration. + +--- SDR/LDR->HDR upconversion options (only used when encoding to HDR formats +from an LDR/SDR source image): + + -hdr_ldr_no_srgb_to_linear: If specified, LDR images will NOT be converted to + normalized linear light (via a sRGB->Linear conversion) during SDR->HDR + upconversion before compressing as HDR. + + -hdr_ldr_upconversion_nit_multiplier X: Specify how many nits (candelas per + sq. meter) LDR/SDR images are converted to after converting to linear + light. Default is 100 nits. Note: Previous builds used 1 nit. Common + values are 80-100 nits. + +--- ASTC LDR/XUASTC LDR specific options (-ldr_4x4 or -ldr_4x4i, up to 12x12): + +Internally both modes (ASTC 4x4-12x12 and XUASTC 4x4-12x12) use the same +core compressor but with different outputs. All 14 standard ASTC block +sizes are supported (see the list below). + + -ldr_4x4-12x12 or -astc_ldr_4x4-12x12: Enable ASTC LDR 4x4-12x12 mode + + -ldr_4x4i-12x12 or -xuastc_ldr_4x4-12x12: Enable XUASTC LDR 4x4-12x12 mode + + -quality X: Enables lossy weight grid DCT and sets DCT quality level [1,100] + (defaults to no DCT). Higher=better quality, but higher bitrate. Good values + to try are 30-90. Default is no weight grid DCT. + + -effort X: Set encoder effort level [1,10]: Encoding speed tradeoff, + higher=slower but potentially higher overall quality. Default=3, 10=Insane. + + -xuastc_arith, -xuastc_hybrid, -xuastc_zstd: Set transcoding speed vs. + compression ratio tradeoff by selecting the output profile/syntax. Default + is -xuastc_zstd (fastest, lowest ratio). ZStd is fastest/lowest ratio, arith + is slowest/highest ratio (3-15% better vs. ZStd). + + -xy: Enables lossy supercompression using windowed/bounded RDO for extra + compression (default is lossless supercompression of the XUASTC texture data + unless DCT is enabled) + + -xyd: Disables lossy supercompression (default, but automatically enabled + if -quality less than 100 is specified ) + + -xs: Force disable 2-3 subset usage in all effort levels (lower quality but + faster compression and faster transcoding to BC7 at certain block sizes) + + -xp: Force disable RGB dual plane usage in all effort levels (lower quality + but faster compression and faster transcoding to BC7 at certain block sizes) + + -ts: Use LDR sRGB ASTC decoding profile - the default. This parameter should + match how the developer will decode or sample the ASTC texture data. Inverse + of -tl. Same as -srgb. + + -tl: Use LDR Linear ASTC decoding profile. Inverse of -ts. Same as -linear. + + -weights X Y Z W: Set unsigned integer channel error weights. Defaults are + 1,1,1,1. Useful to favor certain channels during compression. + + -ls_min_psnr X, -ls_min_alpha_psnr X, -ls_thresh_psnr X, -ls_thresh_alpha_psnr + X, -ls_thresh_edge_psnr X, -ls_thresh_edge_alpha_psnr X: Windowed/bounded RDO + settings (Lossy supercompression must be enabled, or -xy.) + + -xuastc_blurring: Experimental - enable blurred block candidates (MUCH slower + compression, higher quality). + +These modes support all the standard ASTC block sizes. The larger the block +size, the lower the bitrate on disk and in memory, but the more noticeable the +artifacts. Some block sizes (4x4, 6x6, and 8x6) have specially optimized direct +transcoding paths to BC7. For faster direct BC7 transcoding at these block +sizes, disable RGB dual plane (-xp) and subset usage (-xs). + + Block Size Base/Memory Size + 1. 4x4 8.00 bpp + 2. 5x4 6.40 bpp + 3. 5x5 5.12 bpp + 4. 6x5 4.27 bpp + 5. 6x6 3.56 bpp + 6. 8x5 3.20 bpp + 7. 8x6 2.67 bpp + 8. 10x5 2.56 bpp + 9. 10x6 2.13 bpp + 10. 8x8 2.00 bpp + 11. 10x8 1.60 bpp + 12. 10x10 1.28 bpp + 13. 12x10 1.07 bpp + 14. 12x12 0.89 bpp + +--- More options: + + -test: Run an automated LDR ETC1S/UASTC LDR 4x4 encoding and transcoding test. + Returns EXIT_FAILURE on any failures. + + -test_hdr_4x4/-test_hdr_6x6/-test_hdr_6x6i: Run automated UASTC HDR encoding + and transcoding tests. Returns EXIT_FAILURE on any failures. + + -test_xuastc: Run an automated XUASTC LDR encoding and transcoding test. + Returns EXIT_FAILURE on any failures. + + -test_dir: Optional directory of test files. Defaults to "../test_files". + + -y_flip: Flip input images vertically before compression + + -normal_map: Tunes codec parameters for better quality on normal maps (linear + colorspace metrics, linear mipmap filtering, no selector RDO, no sRGB) + + -no_alpha: Always output non-alpha basis files, even if one or more inputs has + alpha + + -force_alpha: Always output alpha basis files, even if no inputs has alpha + + -separate_rg_to_color_alpha: Separate input R and G channels to RGB and A (for + tangent space XY normal maps) + + -swizzle rgba: Specify swizzle for the 4 input color channels using r, g, b + and a (the -separate_rg_to_color_alpha flag is equivalent to rrrg) + + -renorm: Renormalize each input image before any further + processing/compression + + -no_multithreading: Disable multithreading + + -max_threads X: Use at most X threads total when multithreading is enabled + (this includes the main thread) + + -wasi_threads: Set number of threads to use in WASI threading builds + (default=8, only used in WASI threading builds) + + -no_ktx: Disable KTX writing when unpacking (faster, less output files) + + -ktx_only: Only write KTX files when unpacking (faster, less output files) + + -write_out: Write 3dfx OUT files when unpacking FXT1 textures + + -format_only: Only unpack the specified format, by its numeric code. + + -etc1_only: Only unpack to ETC1, skipping the other texture formats + during -unpack + + -disable_hierarchical_endpoint_codebooks: Disable hierarchical endpoint + codebook usage, slower but higher quality on some compression levels + + -compare_ssim: Compute and display SSIM of image comparison (slow) + + -compare_plot: Display histogram plots in -compare mode + + -bench: UASTC benchmark mode, for development only + + -resample X Y: Resample all input textures to XxY pixels using a box filter + + -resample_factor X: Resample all input textures by scale factor X using a box + filter + + -no_sse: Forbid all SSE instruction set usage + + -validate_etc1s: Validate internal ETC1S compressor's data structures during + compression (slower, intended for development). + + -ktx2_animdata_duration X: Set KTX2animData duration field to integer value X + (only valid/useful for -tex_type video, default is 1) + + -ktx2_animdata_timescale X: Set KTX2animData timescale field to integer value + X (only valid/useful for -tex_type video, default is 15) + + -ktx2_animdata_loopcount X: Set KTX2animData loopcount field to integer value + X (only valid/useful for -tex_type video, default is 0) + + -framerate X: Set framerate in .basis header to X/frames sec. + + -ktx2_no_zstandard: Don't compress UASTC texture data using Zstandard -- store + it uncompressed instead. + + -ktx2_zstandard_level X: Set ZStandard compression level to X (see Zstandard + documentation, default level is 6) + + -tonemap_dither: Dither tonemapper's 8-bit/component output by adding a small + amount of white noise, only used with -tonemap mode + +--- Mipmap Generator Options: + +By default, SDR textures will be converted from sRGB to linear light before +mipmap filtering, then back to sRGB (for the RGB color channels) unless -linear +is specified. You can override this behavior with -mip_srgb/-mip_linear. + + -mipmap: Generate mipmaps for each source image + + -mip_srgb: Convert image to linear before filtering, then back to sRGB. + (This is set automatically by default, unless you override it.) + + -mip_linear: Keep image in linear light during mipmap filtering (i.e. do not + convert to/from sRGB for filtering purposes). (This is set automatically by + default, unless you override it.) + + -mip_scale X: Set mipmap filter kernel's scale, lower=sharper, higher=more + blurry, default is 1.0 (quite conservative). + + -mip_filter X: Set mipmap filter kernel, default is kaiser. Supported filters: + box, tent, bell, b-spline, mitchell, blackman, lanczos3, lanczos4, lanczos6, + lanczos12, kaiser, gaussian, catmullrom, quadratic_interp, quadratic_approx, + quadratic_mix + + -mip_renorm: Renormalize normal map to unit length vectors after filtering + + -mip_clamp: Use clamp addressing on borders, instead of wrapping + + -mip_fast: Use faster mipmap generation (resample from previous mip, not + always first/largest mip level). The default. + + -mip_slow: Always resample each mipmap level starting from the largest mipmap. + Higher quality, but slower. Opposite of -mip_fast. + + -mip_smallest X: Set smallest pixel dimension for generated mipmaps, default + is 1 pixel + +--- Transcoding Options (used while unpacking, validating after compression): + +These settings control the "decode flags" used while transcoding: + + -higher_quality_transcoding: Enable higher quality, but slower, transcoding + + -no_deblocking: Always disable adaptive deblocking filter on all block sizes + (XUASTC/ASTC LDR 4x4-12x12 only). By default only block sizes >8x6 are + deblocked while transcoding. (No deblocking ever occurs when transcoding to + ASTC: only when re-encoding ASTC to another format, to lower artifacts.) + + -force_deblocking: Always use adaptive deblocking filter, even for block sizes + <= 8x6 (XUASTC/ASTC LDR 4x4-12x12 only) + + -stronger_deblocking: Use stronger adaptive deblocking filtering (XUASTC/ASTC + LDR 4x4-12x12 only) + + -no_etc1s_chroma_filtering: Disable adaptive ETC1S transcode chroma filter, + for faster transcoding to BC7. + + -fast_xuastc_ldr_bc7_transcoding: Use much faster, but lower quality, XUASTC + LDR 4x4/6x6/8x6 direct BC7 transcoders (the default) + + -no_fast_xuastc_ldr_bc7_transcoding: Disable much faster, but slightly lower + quality, XUASTC LDR 4x4/6x6/8x6 direct BC7 transcoders + +--- Low-Level ETC1S backend endpoint/selector RDO codec options: + + -no_selector_rdo: Disable backend's selector rate distortion optimizations + (slightly faster, less noisy output, but lower quality per output bit) + + -selector_rdo_thresh X: Set selector RDO quality threshold, default is 1.25, + lower is higher quality but less quality per output bit (try 1.0-3.0) + + -no_endpoint_rdo: Disable backend's endpoint rate distortion optimizations + (slightly faster, less noisy output, but lower quality per output bit) + + -endpoint_rdo_thresh X: Set endpoint RDO quality threshold, default is 1.5, + lower is higher quality but less quality per output bit (try 1.0-3.0) + +--- Set various low-level fields in the Basis file header: + + -userdata0 X: Set 32-bit userdata0 field in Basis file header to X (X is a + signed 32-bit int) + + -userdata1 X: Set 32-bit userdata1 field in Basis file header to X (X is a + signed 32-bit int) + +--- Example LDR ETC1S/UASTC LDR 4x4 command lines: + + - basisu x.png : Compress sRGB image x.png to x.ktx2 using default settings + (multiple filenames OK, use -tex_array if you want a tex array vs. multiple + output files) + + - basisu -basis x.qoi : Compress sRGB image x.qoi to x.basis (supports 24-bit + or 32-bit .QOI files) + + - basisu x.ktx2 : Unpack x.basis to PNG/KTX files (multiple filenames OK) + + - basisu x.basis : Unpack x.basis to PNG/KTX files (multiple filenames OK) + + - basisu -uastc x.png -uastc_rdo_l 2.0 -ktx2 -stats : Compress to a + UASTC .KTX2 file with RDO (rate distortion optimization) to reduce .KTX2 + compressed file size + + - basisu -file x.png -mipmap -y_flip : Compress a mipmapped x.ktx2 file from + an sRGB image named x.png, Y flip each source image + + - basisu -validate -file x.basis : Validate x.basis (check header, check file + CRC's, attempt to transcode all slices) + + - basisu -unpack -file x.basis : Validates, transcodes and unpacks x.basis to + mipmapped .KTX and RGB/A .PNG files (transcodes to all supported GPU texture + formats) + + - basisu -q 255 -file x.png -mipmap -debug -stats : Compress sRGB x.png to + x.ktx2 at quality level 255 with compressor debug output/statistics + + - basisu -linear -max_endpoints 16128 -max_selectors 16128 -file x.png : + Compress non-sRGB x.png to x.ktx2 using the largest supported manually + specified codebook sizes + + - basisu -basis -comp_level 2 -max_selectors 8192 -max_endpoints + 8192 -tex_type video -framerate 20 -multifile_printf + "x%02u.png" -multifile_first 1 -multifile_num 20 : Compress a 20 sRGB source + image video sequence (x01.png, x02.png, x03.png, etc.) to x01.basis + +--- Example UASTC HDR 4x4 command lines: + + - basisu x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR 4x4 .KTX2 + file. LDR/SDR images will be upconverted to linear light HDR before + compression. See HDR upconversion options, above. + + - basisu -hdr_4x4 x.exr : Compress a HDR .EXR image to a UASTC HDR 4x4 .KTX2 + file. + + - basisu x.hdr -uastc_level 0 : Compress a HDR .hdr image to a UASTC HDR + 4x4 .KTX2 file, fastest encoding but lowest quality + + - basisu -hdr x.png : Compress a LDR .PNG image to UASTC HDR 4x4 (image is + converted from sRGB to linear light first, use -hdr_ldr_no_srgb_to_linear to + disable) + + - basisu x.hdr -uastc_level 3 : Compress a HDR .hdr image to UASTC HDR 4x4 at + higher quality (-uastc_level 4 is highest quality, but very slow encoding) + + - basisu x.hdr -uastc_level 3 -mipmap -basis -stats -debug -debug_images : + Compress a HDR .hdr image to UASTC HDR 4x4, .basis output file, at higher + quality, generate mipmaps, output statistics and debug information, and write + tone mapped debug images + + - basisu x.hdr -stats -hdr_favor_astc -hdr_uber_mode -uastc_level 4 : Highest + achievable ASTC HDR 4x4 quality (very slow encoding, BC6H quality is traded + off) + +--- Example RDO ASTC/UASTC HDR 6x6 command lines: + + - basisu -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to a UASTC HDR + 6x6 .KTX2 file. LDR/SDR images will be upconverted to linear light HDR before + compression. See HDR upconversion options, above. + + - basisu -lambda 1000 -hdr_6x6 x.exr : Compress a HDR .EXR (or .HDR) image to + a UASTC HDR 6x6 .KTX2 file with rate-distortion optimization (RDO), at lambda + level 1000. + + - basisu -hdr_6x6i x.exr : Compress a HDR .EXR image to a compressed + intermediate format UASTC HDR 6x6 .KTX2 file. + + - basisu -lambda 1000 -hdr_6x6i x.exr : Compress a HDR .EXR image to a + compressed intermediate format UASTC HDR 6x6 .KTX2 file with rate-distortion + optimization (RDO), at lambda level 1000. + +--- Example ASTC/XUASTC LDR 4x4-12x12 command lines: + + - basisu -ldr_6x6i -q 75 -xuastc_arith test.png : Compress test.png to XUASTC + LDR 6x6 using weight grid DCT with setting 75 and the arith profile for + higher compression. + + - basisu -ldr_4x4 test.png : Compress test.png to ASTC LDR 4x4 + + - basisu -mipmap -ldr_10x5i test.png : Compress test.png to XUASTC LDR 10x5, + using lossless ZStd supercompression, with mipmaps + +--- ETC1S Texture Video Notes: Use -comp_level 2 or higher for better codebook +generation, specify very large codebooks using -max_endpoints +and -max_selectors, and reduce the default endpoint RDO threshold +(-endpoint_rdo_thresh) to around 1.25. Videos may have mipmaps and alpha +channels. Videos must always be played back by the transcoder in first to last +image order. Video files currently use I-Frames on the first image, and +P-Frames using conditional replenishment on subsequent frames. + +--- Low-level ETC1S compression (Effort) Level (-comp_level X) Details + +This setting controls the ETC1S speed vs. quality tradeoff. (Use -q to control +the quality vs. compressed size tradeoff.): + + - Level 0: Fastest, but has marginal quality and can be brittle on complex +images. Avg. Y dB: 35.45 + + - Level 1: Hierarchical codebook searching, faster ETC1S encoding. 36.87 dB, +~1.4x slower vs. level 0. (This is the default setting.) + + - Level 2: Use this or higher for video. Hierarchical codebook searching. +36.87 dB, ~1.4x slower vs. level 0. (This is the v1.12's default setting.) + + - Level 3: Full codebook searching. 37.13 dB, ~1.8x slower vs. level 0. +(Equivalent to the initial release's default settings.) + + - Level 4: Hierarchical codebook searching, codebook k-means iterations. 37.15 +dB, ~4x slower vs. level 0 + + - Level 5: Full codebook searching, codebook k-means iterations. 37.41 dB, +~5.5x slower vs. level 0. + + - Level 6: Full codebook searching, twice as many codebook k-means iterations, +best ETC1 endpoint opt. 37.43 dB, ~12x slower vs. level 0 + diff --git a/external/basis_universal/cmd_help/process.py b/external/basis_universal/cmd_help/process.py new file mode 100644 index 0000000000..57b933a096 --- /dev/null +++ b/external/basis_universal/cmd_help/process.py @@ -0,0 +1,12 @@ +# convert_to_c_string.py +def to_c_string(path): + with open(path, "r", encoding="utf-8") as f: + text = f.read() + # escape backslashes and quotes + text = text.replace("\\", "\\\\").replace("\"", "\\\"") + # replace newlines with \n + text = text.replace("\n", "\\n\"\n\"") + return "\"" + text + "\"" + +if __name__ == "__main__": + print(to_c_string("cmd_help.txt")) diff --git a/external/basis_universal/contrib/single_file_transcoder/basisu_transcoder-in.cpp b/external/basis_universal/contrib/single_file_transcoder/basisu_transcoder-in.cpp index b073a37879..63d64b6395 100644 --- a/external/basis_universal/contrib/single_file_transcoder/basisu_transcoder-in.cpp +++ b/external/basis_universal/contrib/single_file_transcoder/basisu_transcoder-in.cpp @@ -29,9 +29,11 @@ #define BASISD_SUPPORT_FXT1 0 /* - * KTX2 support disabled. + * KTX2 support enabled. */ -#define BASISD_SUPPORT_KTX2 0 +#define BASISD_SUPPORT_KTX2 1 + +#define BASISU_ASTC_HELPERS_IMPLEMENTATION #include "basisu_transcoder.cpp" diff --git a/external/basis_universal/contrib/single_file_transcoder/combine.py b/external/basis_universal/contrib/single_file_transcoder/combine.py index 829d433116..8eac17e232 100755 --- a/external/basis_universal/contrib/single_file_transcoder/combine.py +++ b/external/basis_universal/contrib/single_file_transcoder/combine.py @@ -160,7 +160,7 @@ def add_file(file: Path, file_name: str = None) -> None: if (not file_name): file_name = file.name error_line(f'Processing: {file_name}') - with file.open('r', errors='replace') as opened: + with file.open('r', encoding='utf-8-sig', errors='replace') as opened: for line in opened: line = line.rstrip('\n') match_include = include_regex.match(line); diff --git a/external/basis_universal/contrib/single_file_transcoder/create_transcoder.sh b/external/basis_universal/contrib/single_file_transcoder/create_transcoder.sh index 0c7a435bfe..39616d0044 100755 --- a/external/basis_universal/contrib/single_file_transcoder/create_transcoder.sh +++ b/external/basis_universal/contrib/single_file_transcoder/create_transcoder.sh @@ -22,7 +22,7 @@ which cc > /dev/null if [ $? -ne 0 ]; then echo "(Skipping compile test)" else - cc -std=c++11 -Wall -Wextra -Wno-unused-value -Os -g0 -fno-exceptions -fno-rtti -fno-strict-aliasing -o $OUT_FILE examples/simple.cpp -lstdc++ -lm + cc -std=c++17 -Wall -Wextra -Wno-unused-value -Os -g0 -fno-exceptions -fno-rtti -fno-strict-aliasing -o $OUT_FILE examples/simple.cpp ../../zstd/zstd.c -lstdc++ -lm # Did compilation work? if [ $? -ne 0 ]; then echo "Compiling simple.cpp: FAILED" diff --git a/external/basis_universal/encoder/3rdparty/android_astc_decomp.cpp b/external/basis_universal/encoder/3rdparty/android_astc_decomp.cpp index f850d03d0c..b0e99e4b52 100644 --- a/external/basis_universal/encoder/3rdparty/android_astc_decomp.cpp +++ b/external/basis_universal/encoder/3rdparty/android_astc_decomp.cpp @@ -21,6 +21,7 @@ * rg: Removed external dependencies, minor fix to decompress() so it converts non-sRGB * output to 8-bits correctly. I've compared this decoder's output * vs. astc-codec with random inputs. + * See https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt * *//*! * \file @@ -836,11 +837,13 @@ void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& m[4] = data.getNext(numBits); deUint32 T7 = data.getNext(1); +#ifndef __clang__ #ifndef __EMSCRIPTEN__ #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" #endif +#endif #endif switch (numValues) { @@ -853,10 +856,12 @@ void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& default: DE_ASSERT(false); } +#ifndef __clang__ #ifndef __EMSCRIPTEN__ #ifdef __GNUC__ #pragma GCC diagnostic pop #endif +#endif #endif const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0); @@ -902,11 +907,13 @@ void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& m[2] = data.getNext(numBits); deUint32 Q56 = data.getNext(2); +#ifndef __clang__ #ifndef __EMSCRIPTEN__ #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wimplicit-fallthrough=" #endif +#endif #endif switch (numValues) { @@ -917,10 +924,13 @@ void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& default: DE_ASSERT(false); } + +#ifndef __clang__ #ifndef __EMSCRIPTEN__ #ifdef __GNUC__ #pragma GCC diagnostic pop #endif +#endif #endif const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0); @@ -1966,7 +1976,7 @@ float half_to_float(half_float hval) } // anonymous // See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.inline.html#_hdr_endpoint_decoding -static void convert_to_half_prec(uint32_t n, float* pVals) +static void convert_from_half_to_float_prec(uint32_t n, float* pVals) { #if 0 const int prev_dir = fesetround(FE_TOWARDZERO); @@ -1989,6 +1999,7 @@ static void convert_to_half_prec(uint32_t n, float* pVals) #endif } +// Assumes the decode_unorm8 extension is active (only upper 8 bits used). bool decompress_ldr(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth, int blockHeight) { float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4]; @@ -2029,7 +2040,7 @@ bool decompress_hdr(float* pDstRGBA, const uint8_t* data, int blockWidth, int bl return false; } - convert_to_half_prec(blockWidth * blockHeight * 4, pDstRGBA); + convert_from_half_to_float_prec(blockWidth * blockHeight * 4, pDstRGBA); return true; } diff --git a/external/basis_universal/encoder/3rdparty/tinydds.h b/external/basis_universal/encoder/3rdparty/tinydds.h index 41e1d6f131..b1dda65231 100644 --- a/external/basis_universal/encoder/3rdparty/tinydds.h +++ b/external/basis_universal/encoder/3rdparty/tinydds.h @@ -828,7 +828,7 @@ typedef struct TinyDDS_Context { #define TINYDDS_MAKE_RIFFCODE(a, b, c, d) (a | (b << 8) | (c << 16) | (d << 24)) -static uint32_t TinyDDS_fileIdentifier = TINYDDS_MAKE_RIFFCODE('D', 'D', 'S', ' '); +//static uint32_t TinyDDS_fileIdentifier = TINYDDS_MAKE_RIFFCODE('D', 'D', 'S', ' '); static void TinyDDS_NullErrorFunc(void *user, char const *msg) { BASISU_NOTE_UNUSED(user); BASISU_NOTE_UNUSED(msg); } diff --git a/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp b/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp index df99f001db..42e8a7f244 100644 --- a/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp +++ b/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp @@ -1,4 +1,16 @@ // File: basisu_astc_hdr_6x6_enc.cpp +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "basisu_astc_hdr_6x6_enc.h" #include "basisu_enc.h" #include "basisu_astc_hdr_common.h" @@ -22,13 +34,13 @@ using namespace basist::astc_6x6_hdr; namespace astc_6x6_hdr { -static void atomic_max(std::atomic& atomic_var, uint32_t new_value) +static void atomic_max(std::atomic& atomic_var, uint32_t new_value) { uint32_t current = atomic_var.load(std::memory_order_relaxed); for ( ; ; ) { uint32_t new_max = std::max(current, new_value); - if (atomic_var.compare_exchange_weak(current, new_max, std::memory_order_relaxed, std::memory_order_relaxed)) + if (atomic_var.compare_exchange_weak(current, new_max, std::memory_order_relaxed, std::memory_order_relaxed)) break; } } @@ -164,7 +176,7 @@ static float inversePQ(float E) // Highest error is for values less than SMALLEST_PQ_VAL_IN. // // Approximation is round trip lossless for 10-12 bits at [0,10000] nits: -// for x [0,1024] (SCALE=1023) or for x [0,4095] (SCALE=4096): +// for x [0,1024] (SCALE=1023) or for x [0,4095] (SCALE=4096): // round(forwardPQTab(inversePQ(x / SCALE)) * SCALE) == x // // bfloat16 has enough precision to handle 8-bit sRGB to linear conversions: @@ -176,7 +188,7 @@ const int PQ_APPROX_EXP_RANGE = (PQ_APPROX_MAX_EXP - PQ_APPROX_MIN_EXP + 1); const float SMALLEST_PQ_VAL_IN = 0.000015258829080f; const float SMALLEST_PQ_VAL = 0.000551903737f; // forwardPQ(SMALLEST_PQ_VAL_IN) -const float LARGEST_PQ_VAL = 1.251312f; +const float LARGEST_PQ_VAL = 1.251312f; float g_pq_approx_tabs[PQ_APPROX_EXP_RANGE][128]; @@ -248,7 +260,7 @@ static inline float forwardPQTab(float v) } // 100 nits = ~.5 i -// This converts absolute linear RGB light in either REC 709 or REC2020/BT2100 color gamut to ICtCp, a coding space where Ct is scaled by 2. +// This converts absolute linear RGB light in either REC 709 or REC2020/BT2100 color gamut to ICtCp, a coding space where Ct is scaled by 2. // To convert to perceptual ITP for error/distance calculations, multiply the result Ct by .5 (or set itp_flag to true). // Assumes REC 709 input, or REC 2020/BT.2100 RGB input if rec2020_bt2100_color_gamut is true. // @@ -268,7 +280,7 @@ static inline float forwardPQTab(float v) static void linear_rgb_to_ictcp(const vec3F& rgb_in, vec3F& ictcp, bool itp_flag = false, bool rec2020_bt2100_color_gamut = false) { vec3F rgb_2100(rgb_in); - + float l, m, s; if (!rec2020_bt2100_color_gamut) { @@ -613,7 +625,7 @@ struct partition_pattern_vec operator size_t() const { - return basisu::hash_hsieh(m_parts, sizeof(m_parts)); + return basist::hash_hsieh(m_parts, sizeof(m_parts)); } }; @@ -662,7 +674,7 @@ class vp_tree m_nodes[0].m_outer_node = -1; uint_vec inner_list, outer_list; - + inner_list.reserve(n / 2); outer_list.reserve(n / 2); @@ -711,8 +723,8 @@ class vp_tree enum { MaxSupportedSize = 256 + 1 }; public: - result_queue() : - m_cur_size(0) + result_queue() : + m_cur_size(0) { } @@ -765,14 +777,14 @@ class vp_tree bool pop() { - if (m_cur_size == 0) + if (m_cur_size == 0) return false; m_elements[1] = m_elements[m_cur_size--]; down_heap(1); return true; } - + float get_highest_dist() const { if (!m_cur_size) @@ -780,7 +792,7 @@ class vp_tree return top().m_dist; } - + private: result_array_type m_elements; size_t m_cur_size; @@ -814,7 +826,7 @@ class vp_tree } } }; - + void find_nearest(uint32_t num_subsets, const partition_pattern_vec& desired_pat, result_queue& results, uint32_t max_results) { assert((num_subsets >= 2) && (num_subsets <= 3)); @@ -879,7 +891,7 @@ class vp_tree if (m_nodes[node_index].m_outer_node >= 0) { - if ( (results.get_size() < max_results) || + if ( (results.get_size() < max_results) || ((m_nodes[node_index].m_dist - best_dist_to_vantage) <= results.get_highest_dist()) ) { @@ -895,7 +907,7 @@ class vp_tree if (m_nodes[node_index].m_inner_node >= 0) { - if ( (results.get_size() < max_results) || + if ( (results.get_size() < max_results) || ((best_dist_to_vantage - m_nodes[node_index].m_dist) <= results.get_highest_dist()) ) { @@ -904,13 +916,13 @@ class vp_tree } } } - + void find_nearest_at_node_non_recursive(int init_node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) { uint_vec node_stack; node_stack.reserve(16); node_stack.push_back(init_node_index); - + do { const uint32_t node_index = node_stack.back(); @@ -985,7 +997,7 @@ class vp_tree m_nodes.resize(m_nodes.size() + 1); const uint32_t new_node_index = m_nodes.size_u32() - 1; - + m_nodes[new_node_index].m_vantage_point = pUnique_pats[root_idx.first]; m_nodes[new_node_index].m_point_index = root_idx.first; m_nodes[new_node_index].m_dist = root_idx.second; @@ -1039,17 +1051,17 @@ class vp_tree basisu::vector< std::pair > dists; dists.reserve(n); - + float_vec float_dists; float_dists.reserve(n); - + for (uint32_t pat_indices_iter = 0; pat_indices_iter < n; pat_indices_iter++) { const uint32_t split_pat_index = pat_indices[pat_indices_iter]; assert(split_pat_index < num_unique_pats); const partition_pattern_vec& trial_vantage = pUnique_pats[split_pat_index]; - + dists.resize(0); float_dists.resize(0); @@ -1060,7 +1072,7 @@ class vp_tree if (pat_index == split_pat_index) continue; - + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); dists.emplace_back(std::pair(dist, pat_index)); @@ -1080,13 +1092,13 @@ class vp_tree split_dist = (split_dist + dists[(num_dists / 2) - 1].first) * .5f; uint32_t total_inner = 0, total_outer = 0; - + for (uint32_t j = 0; j < n; j++) { const uint32_t pat_index = pat_indices[j]; if (pat_index == split_pat_index) continue; - + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); if (dist <= split_dist) @@ -1096,7 +1108,7 @@ class vp_tree } float split_metric = (float)minimum(total_inner, total_outer) / (float)maximum(total_inner, total_outer); - + if ( (split_metric > best_split_metric) || ((split_metric == best_split_metric) && (s.m_var > best_split_var)) ) { @@ -1115,7 +1127,7 @@ struct partition { uint64_t m_p; - inline partition() : + inline partition() : m_p(0) { } @@ -1145,7 +1157,7 @@ struct partition inline operator size_t() const { - return hash_hsieh((const uint8_t *)&m_p, sizeof(m_p)); + return basist::hash_hsieh((const uint8_t *)&m_p, sizeof(m_p)); } }; @@ -1172,19 +1184,19 @@ static void init_partitions2_6x6() { uint64_t p_bits = 0; uint64_t p_bits_inv = 0; - + for (uint32_t y = 0; y < 6; y++) { for (uint32_t x = 0; x < 6; x++) { uint64_t p = astc_helpers::compute_texel_partition(i, x, y, 0, 2, false); assert(p < 2); - + p_bits |= (p << (x + y * 6)); p_bits_inv |= ((1 - p) << (x + y * 6)); } } - + if (!p_bits) continue; if (p_bits == ((1ULL << 36) - 1)) @@ -1206,13 +1218,13 @@ static void init_partitions2_6x6() BASISU_NOTE_UNUSED(res); } } - + uint32_t num_unique_partitions2 = 0; - + for (const auto& r : phash) { assert(r.second < 1024); - + const uint32_t unique_index = num_unique_partitions2; assert(unique_index < NUM_UNIQUE_PARTITIONS2); @@ -1221,7 +1233,7 @@ static void init_partitions2_6x6() pat_vec[i] = (uint8_t)((r.first >> i) & 1); g_partitions2[unique_index] = pat_vec; - + assert(g_part2_unique_index_to_seed[unique_index] == r.second); g_part2_seed_to_unique_index[r.second] = unique_index; @@ -1260,7 +1272,7 @@ static bool estimate_partition2_6x6( int* pBest_parts, uint32_t num_best_parts) { const uint32_t BLOCK_W = 6, BLOCK_H = 6, BLOCK_T = BLOCK_W * BLOCK_H; - + vec3F training_vecs[BLOCK_T], mean(0.0f); for (uint32_t i = 0; i < BLOCK_T; i++) @@ -1321,10 +1333,10 @@ static bool estimate_partition2_6x6( desired_part.m_parts[i] = proj < 0.0f; } #endif - + //interval_timer tm; //tm.start(); - + #if BRUTE_FORCE_PART_SEARCH uint32_t part_similarity[NUM_UNIQUE_PARTITIONS2]; @@ -1432,7 +1444,7 @@ static bool encode_block_2_subsets( part_pixel_index[part_index][l] = (uint8_t)(x + y * BLOCK_W); part_total_pixels[part_index] = l + 1; - } // x + } // x } // y uint8_t blk_endpoints[2][basist::NUM_MODE11_ENDPOINTS]; @@ -1529,7 +1541,7 @@ static bool encode_block_2_subsets( grid_w, grid_h, // dest/to dimension (grid size) desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] downsampled_weights); // [wy][wx] - + best_log_blk.m_partition_id = (uint16_t)p_seed; memcpy(best_log_blk.m_endpoints, blk_endpoints[0], num_endpoint_vals); memcpy(best_log_blk.m_endpoints + num_endpoint_vals, blk_endpoints[1], num_endpoint_vals); @@ -1589,7 +1601,7 @@ static void init_partitions3_6x6() partition3_hash_map part3_hash; part3_hash.reserve(512); - + for (uint32_t seed_index = 0; seed_index < 1024; seed_index++) { partition_pattern_vec p3; @@ -1693,13 +1705,13 @@ static bool estimate_partition3_6x6( if ((cluster_centroids[0] == cluster_centroids[2]) || (cluster_centroids[1] == cluster_centroids[2])) return false; - + uint32_t cluster_pixels[NUM_SUBSETS][BLOCK_T]; uint32_t num_cluster_pixels[NUM_SUBSETS]; vec3F new_cluster_means[NUM_SUBSETS]; const uint32_t NUM_ITERS = 4; - + for (uint32_t s = 0; s < NUM_ITERS; s++) { memset(num_cluster_pixels, 0, sizeof(num_cluster_pixels)); @@ -1707,9 +1719,9 @@ static bool estimate_partition3_6x6( for (uint32_t i = 0; i < BLOCK_T; i++) { - float d[NUM_SUBSETS] = { - training_vecs[i].squared_distance(cluster_centroids[0]), - training_vecs[i].squared_distance(cluster_centroids[1]), + float d[NUM_SUBSETS] = { + training_vecs[i].squared_distance(cluster_centroids[0]), + training_vecs[i].squared_distance(cluster_centroids[1]), training_vecs[i].squared_distance(cluster_centroids[2]) }; float min_d = d[0]; @@ -1736,7 +1748,7 @@ static bool estimate_partition3_6x6( cluster_centroids[j] = new_cluster_means[j] / (float)num_cluster_pixels[j]; } } // s - + partition_pattern_vec desired_part; for (uint32_t p = 0; p < NUM_SUBSETS; p++) { @@ -1771,7 +1783,7 @@ static bool estimate_partition3_6x6( } // part_index; std::sort(part_similarity, part_similarity + NUM_UNIQUE_PARTITIONS3); - + for (uint32_t i = 0; i < num_best_parts; i++) pBest_parts[i] = part_similarity[i] & 0xFFFF; #else @@ -1799,15 +1811,15 @@ static bool encode_block_3_subsets( astc_hdr_codec_base_options& coptions, bool uber_mode_flag, const int* pEst_patterns, int num_est_patterns, - uint32_t comp_level, + uint32_t comp_level, opt_mode_t mode11_opt_mode) { BASISU_NOTE_UNUSED(uber_mode_flag); const uint32_t BLOCK_W = 6, BLOCK_H = 6, NUM_SUBSETS = 3; const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem); - + res.m_valid = false; - + double best_e = BIG_FLOAT_VAL; astc_helpers::log_astc_block best_log_blk; @@ -1849,7 +1861,7 @@ static bool encode_block_3_subsets( part_pixel_index[part_index][l] = (uint8_t)(x + y * BLOCK_W); part_total_pixels[part_index] = l + 1; - } // x + } // x } // y uint8_t blk_endpoints[NUM_SUBSETS][basist::NUM_MODE11_ENDPOINTS]; @@ -1889,7 +1901,7 @@ static bool encode_block_3_subsets( blk_endpoints[part_iter], blk_weights[part_iter], coptions, - false, best_log_blk.m_endpoint_ise_range, uber_mode_flag, false, + false, best_log_blk.m_endpoint_ise_range, uber_mode_flag, false, FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, false, mode11_opt_mode); } @@ -1903,6 +1915,7 @@ static bool encode_block_3_subsets( if (failed_flag) continue; + uint8_t ise_weights[BLOCK_W * BLOCK_H]; uint32_t src_pixel_index[NUM_SUBSETS] = { 0 }; @@ -1926,7 +1939,7 @@ static bool encode_block_3_subsets( for (uint32_t p = 0; p < NUM_SUBSETS; p++) memcpy(best_log_blk.m_endpoints + num_endpoint_vals * p, blk_endpoints[p], num_endpoint_vals); - + memcpy(best_log_blk.m_weights, ise_weights, BLOCK_W * BLOCK_H); } } @@ -1959,7 +1972,7 @@ static bool encode_block_3_subsets( astc_helpers::log_astc_block trial_blk(best_log_blk); trial_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_part_index]; - + for (uint32_t p = 0; p < NUM_SUBSETS; p++) memcpy(trial_blk.m_endpoints + num_endpoint_vals * p, blk_endpoints[p], num_endpoint_vals); @@ -2029,7 +2042,7 @@ static uint32_t encode_values(bitwise_coder &coder, uint32_t total_values, const uint32_t total_tq_values = 0, tq_accum = 0, tq_mul = 1; assert((total_values) && (total_values <= MAX_VALS)); - + const uint32_t ep_bits = astc_helpers::g_ise_range_table[endpoint_range][0]; const uint32_t ep_trits = astc_helpers::g_ise_range_table[endpoint_range][1]; const uint32_t ep_quints = astc_helpers::g_ise_range_table[endpoint_range][2]; @@ -2072,7 +2085,7 @@ static uint32_t encode_values(bitwise_coder &coder, uint32_t total_values, const } uint32_t total_bits_output = 0; - + for (uint32_t i = 0; i < total_tq_values; i++) { const uint32_t num_bits = ep_trits ? 8 : 7; @@ -2143,7 +2156,7 @@ static void code_block(bitwise_coder& coder, { const int unique_partition_index = g_part2_seed_to_unique_index[log_blk.m_partition_id]; assert(unique_partition_index != -1); - + coder.put_truncated_binary(unique_partition_index, NUM_UNIQUE_PARTITIONS2); } else if (log_blk.m_num_partitions == 3) @@ -2153,7 +2166,7 @@ static void code_block(bitwise_coder& coder, coder.put_truncated_binary(unique_partition_index, NUM_UNIQUE_PARTITIONS3); } - + encode_values(coder, num_endpoint_vals * log_blk.m_num_partitions, log_blk.m_endpoints, log_blk.m_endpoint_ise_range); } @@ -2187,7 +2200,7 @@ struct smooth_map_params // 3x3 region m_max_smooth_std_dev = 100.0f; m_smooth_max_mse_scale = 13000.0f; - + // 7x7 region m_max_med_smooth_std_dev = 9.0f; m_med_smooth_max_mse_scale = 15000.0f; @@ -2255,7 +2268,7 @@ static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_blo for (uint32_t c = 0; c < 3; c++) { const basist::half_float h = basist::float_to_half(temp_block[y][x][c]); - + pDst_block_half3[x + y * 6][c] = h; pDst_block_q16[x + y * 6][c] = (float)half_to_qlog16(h); } @@ -2276,9 +2289,9 @@ static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_blo for (uint32_t i = 0; i < pCol_lists[y].n; i++) p += temp_block[pCol_lists[y].p[i].pixel][x] * pCol_lists[y].p[i].weight; - + p.clamp(0.0f, basist::ASTC_HDR_MAX_VAL); - + for (uint32_t c = 0; c < 3; c++) { const basist::half_float h = basist::float_to_half(p[c]); @@ -2288,7 +2301,7 @@ static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_blo } pDst_block_q16[x + y * 6][3] = 0.0f; - + } // x } // y } @@ -2379,7 +2392,7 @@ static void filter_block(uint32_t grid_x, uint32_t grid_y, const vec3F* pSrc_blo for (uint32_t i = 0; i < pRow_lists[x].n; i++) p += vec3F(pSrc_block[y * 6 + pRow_lists[x].p[i].pixel]) * pRow_lists[x].p[i].weight; - + temp_block[y][x] = p; } // x } // y @@ -2415,7 +2428,7 @@ static float diff_blocks(const vec4F* pA, const vec4F* pB) float diff = 0.0f; for (uint32_t i = 0; i < BLOCK_T; i++) diff += square(pA[i][0] - pB[i][0]) + square(pA[i][1] - pB[i][1]) + square(pA[i][2] - pB[i][2]); - + return diff * (1.0f / (float)BLOCK_T); } @@ -2457,13 +2470,14 @@ static void create_smooth_maps2( const uint32_t height = orig_img.get_height(); //const uint32_t total_pixels = orig_img.get_total_pixels(); const uint32_t num_comps = 3; - + if (params.m_no_mse_scaling) { smooth_block_mse_scales.set_all(1.0f); return; } - + + // TODO: - move up before the no mse scaling check (harmless as that is only a debug aid) smooth_block_mse_scales.resize(width, height); image smooth_vis, med_smooth_vis, ultra_smooth_vis; @@ -2563,7 +2577,7 @@ static void create_smooth_maps2( float yl = clampf(max_std_dev / params.m_max_ultra_smooth_std_dev, 0.0f, 1.0f); yl = powf(yl, 2.0f); - + smooth_block_mse_scales(x, y) = lerp(params.m_ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl); if (params.m_debug_images) @@ -2606,12 +2620,12 @@ static float compute_pixel_mse_itp(const vec3F& orig_pixel_itp, const vec3F& com float delta_i = orig_pixel_itp[0] - comp_pixel_itp[0]; float delta_t = orig_pixel_itp[1] - comp_pixel_itp[1]; float delta_p = orig_pixel_itp[2] - comp_pixel_itp[2]; - + float err = (delta_i * delta_i) + (delta_t * delta_t) + (delta_p * delta_p); if (delta_itp_dark_adjustment) { - // We have to process a large range of inputs, including extremely dark inputs. + // We have to process a large range of inputs, including extremely dark inputs. // Artifically amplify MSE on very dark pixels - otherwise they'll be overly compressed at higher lambdas. // This is to better handle very dark signals which could be explictly overexposed. float s = bu_math::smoothstep(0.0f, REALLY_DARK_I_THRESHOLD, orig_pixel_itp[0]); @@ -2699,7 +2713,7 @@ static float compute_pixel_delta_itp(const vec3F& a, const vec3F& b, const vec3F float err = 720.0f * sqrtf((delta_i * delta_i) + (delta_t * delta_t) + (delta_p * delta_p)); float s = bu_math::smoothstep(0.0f, REALLY_DARK_I_THRESHOLD, orig[0]); - + if (delta_itp_dark_adjustment) { // This is to better handle very dark signals which could be explictly overexposed. @@ -2713,22 +2727,22 @@ static float compute_pixel_delta_itp(const vec3F& a, const vec3F& b, const vec3F struct candidate_encoding { encoding_type m_encoding_type; - + basist::half_float m_solid_color[3]; uint32_t m_run_len; vec3F m_comp_pixels[MAX_BLOCK_H][MAX_BLOCK_W]; // [y][x] vec3F m_comp_pixels_itp[MAX_BLOCK_H][MAX_BLOCK_W]; // [y][x] - + endpoint_mode m_endpoint_mode; block_mode m_block_mode; bitwise_coder m_coder; - + // The block to code, which may not be valid ASTC. This may have to be transcoded (by requantizing the weights/endpoints) before it's valid ASTC. // Note the endpoints may be coded endpoints OR transcoded endpoints, depending on the encoding type. - astc_helpers::log_astc_block m_coded_log_blk; + astc_helpers::log_astc_block m_coded_log_blk; // The block the decoder outputs. astc_helpers::log_astc_block m_decomp_log_blk; @@ -2736,9 +2750,9 @@ struct candidate_encoding int m_reuse_delta_index; // m_t can get VERY large - double m_t, m_d; + double m_t, m_d; float m_bits; - + candidate_encoding() { clear(); @@ -2769,7 +2783,7 @@ struct candidate_encoding m_coded_log_blk = rhs.m_coded_log_blk; m_decomp_log_blk = rhs.m_decomp_log_blk; m_reuse_delta_index = rhs.m_reuse_delta_index; - + return *this; } @@ -2801,19 +2815,19 @@ struct candidate_encoding m_run_len = 0; clear_obj(m_comp_pixels); - + m_endpoint_mode = endpoint_mode::cInvalid; m_block_mode = block_mode::cInvalid; m_coder.restart(); - + m_coded_log_blk.clear(); m_decomp_log_blk.clear(); m_t = 0; m_d = 0; m_bits = 0; - + m_reuse_delta_index = 0; } }; @@ -2837,7 +2851,7 @@ bool decode_astc_block(uint32_t block_w, uint32_t block_h, astc_helpers::log_ast basist::half_to_float(decoded_pixels_half4[x + y * block_w][0]), basist::half_to_float(decoded_pixels_half4[x + y * block_w][1]), basist::half_to_float(decoded_pixels_half4[x + y * block_w][2])); - } // x + } // x } //y return true; @@ -2868,12 +2882,19 @@ static bool decode_file(const uint8_vec& comp_data, vector2D MAX_ASTC_HDR_6X6_DIM) || (height > MAX_ASTC_HDR_6X6_DIM)) return false; @@ -2890,11 +2911,11 @@ static bool decode_file(const uint8_vec& comp_data, vector2D num_blocks_remaining) return false; - + uint32_t prev_bx = cur_bx, prev_by = cur_by; if (cur_bx) @@ -2994,7 +3015,7 @@ static bool decode_file(const uint8_vec& comp_data, vector2D { const uint32_t width = src_img.get_width(); const uint32_t height = src_img.get_height(); - + if (pPacked_bc6h_img) pPacked_bc6h_img->resize(width, height); @@ -3380,7 +3401,7 @@ static bool pack_bc6h_image(const imagef &src_img, vector2D const uint32_t num_blocks_y = src_img.get_block_height(4); bc6h_blocks.resize(num_blocks_x, num_blocks_y); - + for (uint32_t by = 0; by < num_blocks_y; by++) { for (uint32_t bx = 0; bx < num_blocks_x; bx++) @@ -3426,7 +3447,7 @@ static bool pack_bc6h_image(const imagef &src_img, vector2D fmt_error_printf("unpack_bc6h() failed\n"); return false; } - + for (uint32_t y = 0; y < 4; y++) { for (uint32_t x = 0; x < 4; x++) @@ -3468,7 +3489,7 @@ static void estimate_partitions_mode7_and_11( uint32_t num_pats_to_examine, const uint32_t* pUnique_pat_indices_to_examine, // indices of pats to examine const vec3F *pHalf_pixels_as_floats, // block's half pixel values casted to floats const astc_hdr_codec_base_options& coptions, // options - uint32_t num_desired_pats, + uint32_t num_desired_pats, int *pDesired_pat_indices_mode11, int *pDesired_pat_indices_mode7) // output indices { BASISU_NOTE_UNUSED(coptions); @@ -3491,7 +3512,7 @@ static void estimate_partitions_mode7_and_11( candidate_res mode7_candidates[MAX_CANDIDATES]; const vec3F grayscale_axis(0.5773502691f); - + for (uint32_t examine_iter = 0; examine_iter < num_pats_to_examine; examine_iter++) { const uint32_t unique_part_index = pUnique_pat_indices_to_examine[examine_iter]; @@ -3517,7 +3538,7 @@ static void estimate_partitions_mode7_and_11( } // x } // y - + for (uint32_t i = 0; i < num_parts; i++) { assert(part_total_texels[i]); @@ -3553,7 +3574,7 @@ static void estimate_partitions_mode7_and_11( for (uint32_t part_index = 0; part_index < num_parts; part_index++) total_variance[part_index] = part_cov[part_index][0] + part_cov[part_index][3] + part_cov[part_index][5]; - vec3F part_axis[MAX_PARTS]; + //vec3F part_axis[MAX_PARTS]; float mode11_eigenvalue_est[MAX_PARTS]; // For each partition, compute the variance along the principle axis float mode7_eigenvalue_est[MAX_PARTS]; // For each partition, compute the variance along the principle axis @@ -3562,7 +3583,7 @@ static void estimate_partitions_mode7_and_11( float* pCov = &part_cov[part_index][0]; float xr = .9f, xg = 1.0f, xb = .7f; - + const uint32_t NUM_POWER_ITERS = 4; for (uint32_t iter = 0; iter < NUM_POWER_ITERS; iter++) { @@ -3575,7 +3596,7 @@ static void estimate_partitions_mode7_and_11( if (m >= 1e-10f) { m = 1.0f / m; - + r *= m; g *= m; b *= m; @@ -3587,7 +3608,7 @@ static void estimate_partitions_mode7_and_11( } float len_sq = xr * xr + xg * xg + xb * xb; - + if (len_sq < 1e-10f) { xr = grayscale_axis[0]; @@ -3602,7 +3623,7 @@ static void estimate_partitions_mode7_and_11( xg *= len_sq; xb *= len_sq; } - + { // Transform the principle axis by the covariance matrix, which will scale the vector by its eigenvalue (the variance of the dataset projected onto the principle axis). float r = xr * pCov[0] + xg * pCov[1] + xb * pCov[2]; @@ -3613,13 +3634,13 @@ static void estimate_partitions_mode7_and_11( // The result is the variance along the principle axis. //float z1 = sqrtf(r * r + g * g + b * b); // this works with the principle axis //float z2 = r * xr + g * xg + b * xb; // compute length projected along xr,xg,xb - + mode11_eigenvalue_est[part_index] = r * xr + g * xg + b * xb; } { const float yrgb = grayscale_axis[0]; - + // Transform the grayscale axis by the covariance matrix, which will scale the vector by the eigenvalue (which is the variance of the dataset projected onto this vector). float r = yrgb * pCov[0] + yrgb * pCov[1] + yrgb * pCov[2]; float g = yrgb * pCov[1] + yrgb * pCov[3] + yrgb * pCov[4]; @@ -3629,7 +3650,7 @@ static void estimate_partitions_mode7_and_11( } } // part_index - + // Compute the total variance (squared error) of the other 2 axes by subtracting the total variance of all channels by the variance of the principle axis. // TODO: Could also compute the ratio of the principle axis's variance vs. the total variance. float mode11_total_sq_dist_to_line_alt = 0.0f; @@ -3742,7 +3763,7 @@ static void estimate_partitions_mode7( } vec3F part_axis(0.5773502691f); - + // TODO: This total distance can be computed rapidly. First compute the total variance of each channel (sum the diag entries of the covar matrix), // then compute the principle eigenvalue, and subtract. The result is the variance of the projection distances. float total_sq_dist_to_line = 0.0f; @@ -3808,7 +3829,7 @@ static float calc_deblocking_penalty_itp( const vec3F& q_pixel_itp = pass_src_img_itp(qx, qy); const vec3F &d_pixel_itp = candidate.m_comp_pixels_itp[qy - by * 6][qx - bx * 6]; // compressed block - + vec3F orig_delta_v(o_pixel_itp - q_pixel_itp); total_orig_mse += square(orig_delta_v[0]) + square(orig_delta_v[1]) + square(orig_delta_v[2]); @@ -3872,15 +3893,15 @@ static bool calc_strip_size( else { rows_per_strip = (num_blocks_y / total_strips) & ~1; - + if (rows_per_strip < 2) rows_per_strip = 2;// num_blocks_y; } - + assert((rows_per_strip == num_blocks_y) || ((rows_per_strip & 1) == 0)); total_strips = (num_blocks_y + rows_per_strip - 1) / rows_per_strip; - + if (global_cfg.m_debug_output) { fmt_printf("num_blocks_y: {}, total_threads : {}, Total strips : {}\n", num_blocks_y, total_threads, total_strips); @@ -3990,7 +4011,7 @@ struct uastc_hdr_6x6_debug_state std::mutex m_vis_image_mutex; std::atomic m_comp_level_hist[ASTC_HDR_6X6_MAX_COMP_LEVEL + 1]; - + std::atomic m_total_jnd_replacements; std::mutex m_stats_mutex; @@ -4006,7 +4027,7 @@ struct uastc_hdr_6x6_debug_state } } } - + void init(uint32_t width, uint32_t height) { m_stat_vis.resize(width, height); @@ -4020,7 +4041,7 @@ struct uastc_hdr_6x6_debug_state basisu::clear_obj(m_endpoint_mode_hist); basisu::clear_obj(m_block_mode_hist); basisu::clear_obj(m_block_mode_total_bits); - + for (uint32_t i = 0; i < (uint32_t)block_mode::cBMTotalModes; i++) { for (uint32_t j = 0; j < 3; j++) @@ -4047,7 +4068,7 @@ struct uastc_hdr_6x6_debug_state for (uint32_t i = 0; i < std::size(m_total_part2_stats); i++) m_total_part2_stats[i].store(0); - + for (uint32_t i = 0; i < std::size(m_dp_stats); i++) m_dp_stats[i].store(0); @@ -4171,9 +4192,9 @@ struct uastc_hdr_6x6_debug_state struct uastc_hdr_6x6_encode_state { astc_hdr_codec_base_options master_coptions; - + imagef src_img; - + imagef src_img_filtered1; imagef src_img_filtered2; @@ -4199,7 +4220,7 @@ static bool compress_strip_task( { BASISU_NOTE_UNUSED(num_blocks_y); BASISU_NOTE_UNUSED(total_strips); - + vec3F prev_comp_pixels[BLOCK_H][BLOCK_W]; // [y][x] basisu::clear_obj(prev_comp_pixels); @@ -4216,6 +4237,8 @@ static bool compress_strip_task( basisu::vector candidates; candidates.reserve(CANDIDATES_TO_RESERVE); + const bool use_orig_behavior = global_cfg.m_write_basisu_1_6_compatible_files; + for (uint32_t by = strip_first_by; by <= strip_last_by; by++) { const bool has_upper_neighbor = by > strip_first_by; @@ -4438,15 +4461,15 @@ static bool compress_strip_task( for (uint32_t i = 0; i < 3; i++) { #if 0 - // 9/5/2025, wrong metric, we're iterating channels pairs here, not individual channels. + // 9/5/2025, wrong metric, we're iterating channels pairs here, not individual channels. // On 3 active channel blocks this causes no difference. - if (half_comp_stats[i].m_range > 0.0f) + if (half_comp_stats[i].m_range > 0.0f) #else static const uint8_t s_chan_pairs[3][2] = { {0, 1}, {0, 2}, {1, 2} }; - + const uint32_t chanA = s_chan_pairs[i][0]; const uint32_t chanB = s_chan_pairs[i][1]; - + if ((half_comp_stats[chanA].m_range > 0.0f) && (half_comp_stats[chanB].m_range > 0.0f)) #endif { @@ -4681,7 +4704,7 @@ static bool compress_strip_task( } // Create the block the decoder would transcode into. - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk, use_orig_behavior); } else if (prev_coded_log_blk.m_num_partitions == 2) { @@ -4708,7 +4731,7 @@ static bool compress_strip_task( part_half_pixels[part_index][l] = half_pixels[y][x]; part_total_pixels[part_index] = l + 1; - } // x + } // x } // y uint8_t blk_weights[2][BLOCK_W * BLOCK_H]; @@ -4759,7 +4782,7 @@ static bool compress_strip_task( basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); // Create the block the decoder would transcode into. - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk, use_orig_behavior); } else if (prev_coded_log_blk.m_num_partitions == 3) { @@ -4786,7 +4809,7 @@ static bool compress_strip_task( part_half_pixels[part_index][l] = half_pixels[y][x]; part_total_pixels[part_index] = l + 1; - } // x + } // x } // y uint8_t blk_weights[3][BLOCK_W * BLOCK_H]; @@ -4829,7 +4852,7 @@ static bool compress_strip_task( basist::astc_6x6_hdr::requantize_astc_weights(num_grid_samples, coded_log_blk.m_weights, coded_log_blk.m_weight_ise_range, transcode_weights, decomp_log_blk.m_weight_ise_range); // Create the block the decoder would transcode into. - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_log_blk, use_orig_behavior); } if (!validate_log_blk(decomp_log_blk)) @@ -5372,7 +5395,7 @@ static bool compress_strip_task( memcpy(decomp_blk.m_endpoints, transcode_endpoints, num_endpoint_vals); - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk, use_orig_behavior); if (!validate_log_blk(decomp_blk)) { @@ -5604,7 +5627,7 @@ static bool compress_strip_task( memcpy(decomp_blk.m_endpoints, transcode_endpoints, num_endpoint_vals); - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk, use_orig_behavior); if (!validate_log_blk(decomp_blk)) { @@ -5926,7 +5949,7 @@ static bool compress_strip_task( basist::astc_6x6_hdr::requantize_ise_endpoints(mode_desc.m_cem, mode_desc.m_endpoint_ise_range, coded_log_blk.m_endpoints, mode_desc.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints); - copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk); + copy_weight_grid(dual_plane, grid_x, grid_y, transcode_weights, decomp_blk, use_orig_behavior); if (!validate_log_blk(decomp_blk)) { @@ -6128,13 +6151,13 @@ static bool compress_strip_task( mode_penalty *= (complex_block ? RUN_PENALTY * 2.0f : RUN_PENALTY); float candidate_bits = (float)candidate.m_coder.get_total_bits(); - + double candidate_d = (double)candidate_mse * mode_penalty; const float D_POWER = 2.0f; - + // this value can get VERY large after squaring on random (fuzzed) HDR inputs - double candidate_t = perceptual_scale * pow(candidate_d, D_POWER) + candidate_bits * (global_cfg.m_lambda * 1000.0f); + double candidate_t = perceptual_scale * pow(candidate_d, D_POWER) + candidate_bits * (global_cfg.m_lambda * 1000.0f); candidate.m_t = candidate_t; candidate.m_d = candidate_d; @@ -6151,7 +6174,7 @@ static bool compress_strip_task( if (best_candidate_index < 0) { assert(0); - + // Should never happen best_candidate_index = 0; } @@ -6171,7 +6194,7 @@ static bool compress_strip_task( debug_state.m_total_gaussian2_blocks.fetch_add(1, std::memory_order_relaxed); continue; } - + if (global_cfg.m_rdo_candidate_diversity_boost) { // candidate diversity boosting - consider candidates along/near the Pareto front @@ -6401,7 +6424,7 @@ static bool compress_strip_task( const uint32_t p = pat[x + y * 6]; debug_state.m_part_vis.set_clipped(bx * 6 + x, by * 6 + y, color_rgba(p ? 100 : 0, 128, p ? 100 : 0, 255)); } // x - } // y + } // y } else if (best_candidate.m_decomp_log_blk.m_num_partitions == 3) { @@ -6424,7 +6447,7 @@ static bool compress_strip_task( c.set(0, 100, 150, 255); debug_state.m_part_vis.set_clipped(bx * 6 + x, by * 6 + y, c); } // x - } // y + } // y } else if (best_candidate.m_decomp_log_blk.m_dual_plane) { @@ -6537,7 +6560,7 @@ void global_init() tm.start(); init_pq_tables(); - + init_partitions2_6x6(); init_partitions3_6x6(); @@ -6554,7 +6577,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa assert(g_initialized); if (!g_initialized) return false; - + assert(pJob_pool); if (orig_global_cfg.m_debug_output) @@ -6600,17 +6623,17 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa f = basist::ASTC_HDR_MAX_VAL; enc_state.src_img(x, y)[c] = f; - + } // c - + } // x } // y - + if (global_cfg.m_debug_images) { write_exr((global_cfg.m_debug_image_prefix + "orig.exr").c_str(), enc_state.src_img, 3, 0); } - + image src_img_compressed; tonemap_image_compressive2(src_img_compressed, enc_state.src_img); @@ -6635,10 +6658,10 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa enc_state.src_img_filtered1.resize(width, height); image_resample(enc_state.src_img, enc_state.src_img_filtered1, "gaussian", global_cfg.m_gaussian1_strength); //1.45f); - + enc_state.src_img_filtered2.resize(width, height); image_resample(enc_state.src_img, enc_state.src_img_filtered2, "gaussian", global_cfg.m_gaussian2_strength); //1.83f); - + if (global_cfg.m_debug_images) { write_exr((global_cfg.m_debug_image_prefix + "blurred1.exr").c_str(), enc_state.src_img_filtered1, 3, 0); @@ -6650,10 +6673,10 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa enc_state.src_img_itp.resize(width, height); convet_rgb_image_to_itp(enc_state.src_img, enc_state.src_img_itp, global_cfg); - + enc_state.src_img_filtered1_itp.resize(width, height); convet_rgb_image_to_itp(enc_state.src_img_filtered1, enc_state.src_img_filtered1_itp, global_cfg); - + enc_state.src_img_filtered2_itp.resize(width, height); convet_rgb_image_to_itp(enc_state.src_img_filtered2, enc_state.src_img_filtered2_itp, global_cfg); @@ -6666,20 +6689,22 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa fmt_error_printf("compress_photo: Failed computing strip sizes\n"); return false; } - + if (global_cfg.m_debug_output) fmt_printf("lambda: {}, comp_level: {}, highest_comp_level: {}, extra patterns: {}\n", global_cfg.m_lambda, global_cfg.m_master_comp_level, global_cfg.m_highest_comp_level, global_cfg.m_extra_patterns_flag); - + enc_state.coded_blocks.resize(num_blocks_x, num_blocks_y); - + bitwise_coder coded_bits; - coded_bits.put_bits(0xABCD, 16); + // For Basis v1.60 files write the original marker, otherwise write the new marker. + coded_bits.put_bits(global_cfg.m_write_basisu_1_6_compatible_files ? UASTC_6x6_HDR_SIG0 : UASTC_6x6_HDR_SIG1, 16); + coded_bits.put_bits(width, 16); coded_bits.put_bits(height, 16); - + enc_state.packed_img.resize(width, height); - + enc_state.strip_bits.resize(total_strips); enc_state.final_astc_blocks.resize(num_blocks_x, num_blocks_y); @@ -6690,7 +6715,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa debug_state.init(width, height); else debug_state.init(0, 0); - + interval_timer tm; tm.start(); @@ -6700,7 +6725,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa for (uint32_t strip_index = 0; strip_index < total_strips; strip_index++) { const uint32_t strip_first_by = strip_index * rows_per_strip; - + uint32_t strip_last_by = minimum(strip_first_by + rows_per_strip - 1, num_blocks_y); if (strip_index == (total_strips - 1)) strip_last_by = num_blocks_y - 1; @@ -6726,7 +6751,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa if (any_failed_flag) break; - + } // strip_index pJob_pool->wait_for_all(); @@ -6736,7 +6761,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa fmt_error_printf("One or more strips failed during compression\n"); return false; } - + if (global_cfg.m_debug_output) fmt_printf("Encoding time: {} secs\n", tm.get_elapsed_secs()); @@ -6755,7 +6780,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa for (uint32_t i = 0; i < total_strips; i++) coded_bits.append(enc_state.strip_bits[i]); - + coded_bits.put_bits(0xA742, 16); coded_bits.flush(); @@ -6764,13 +6789,13 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa { write_exr((global_cfg.m_output_image_prefix + "comp.exr").c_str(), enc_state.packed_img, 3, 0); } - + if (global_cfg.m_debug_output) fmt_printf("\nTotal intermediate output bits/pixel: {3.4}\n", (float)coded_bits.get_total_bits() / (float)(width * height)); vector2D decoded_blocks1; vector2D decoded_blocks2; - + if (global_cfg.m_debug_output) fmt_printf("decode_file\n"); @@ -6867,7 +6892,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa fmt_error_printf("unpack_physical_astc_block() failed\n"); return false; } - + unpacked_astc_img.set_block_clipped(pixels, x * BLOCK_W, y * BLOCK_H, BLOCK_W, BLOCK_H); vec4F pixels_google[MAX_BLOCK_W * MAX_BLOCK_H]; @@ -6890,7 +6915,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa } } } - + if (global_cfg.m_debug_output) fmt_printf("\nUnpack succeeded\n"); @@ -6898,9 +6923,9 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa { vector2D bc6h_blocks; - + fast_bc6h_params enc_params; - + bool pack_status = pack_bc6h_image(unpacked_astc_img, bc6h_blocks, &unpacked_bc6h_img, enc_params); if (!pack_status) { @@ -6909,7 +6934,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa } unpacked_bc6h_img.crop(width, height); - + if (global_cfg.m_output_images) { write_exr((global_cfg.m_output_image_prefix + "unpacked_bc6h.exr").c_str(), unpacked_bc6h_img, 3, 0); @@ -6918,7 +6943,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa unpacked_astc_img.crop(width, height); unpacked_astc_google_img.crop(width, height); - + if (global_cfg.m_output_images) { write_exr((global_cfg.m_output_image_prefix + "unpacked_astc.exr").c_str(), unpacked_astc_img, 3, 0); @@ -6943,7 +6968,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa im.print_hp(); } } - + metrics.m_im_astc_log2.calc(enc_state.src_img, unpacked_astc_img, 0, 3, true, true); if (global_cfg.m_debug_output) @@ -6993,7 +7018,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa for (uint32_t i = 0; i < 3; i++) { im.calc(enc_state.src_img, unpacked_bc6h_img, i, 1, true, true); - + if (global_cfg.m_debug_output) { printf("%c: ", "RGBA"[i]); @@ -7015,14 +7040,14 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa if (global_cfg.m_image_stats) { image_metrics im; - + if (global_cfg.m_debug_output) printf("BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n"); for (uint32_t i = 0; i < 3; i++) { im.calc_half(enc_state.src_img, unpacked_bc6h_img, i, 1, true); - + if (global_cfg.m_debug_output) { printf("%c: ", "RGBA"[i]); @@ -7031,7 +7056,7 @@ bool compress_photo(const basisu::imagef &orig_src_img, const astc_hdr_6x6_globa } metrics.m_im_bc6h_half.calc_half(enc_state.src_img, unpacked_bc6h_img, 0, 3, true); - + if (global_cfg.m_debug_output) { printf("RGB: "); diff --git a/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h b/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h index fe89c5b703..4eac0ed7e6 100644 --- a/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h +++ b/external/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h @@ -5,22 +5,25 @@ namespace astc_6x6_hdr { + const uint32_t ASTC_HDR_6X6_DEF_USER_COMP_LEVEL = 2; const uint32_t ASTC_HDR_6X6_MAX_USER_COMP_LEVEL = 12; - + const uint32_t ASTC_HDR_6X6_MAX_COMP_LEVEL = 4; - + const float LDR_BLACK_BIAS = 0.0f;// .49f; - + // Note: This struct is copied several times, so do not place any heavyweight objects in here. struct astc_hdr_6x6_global_config { // Important: The Delta ITP colorspace error metric we use internally makes several assumptions about the nature of the HDR RGB inputs supplied to the encoder. - // This encoder computes colorspace error in the ICtCp (or more accurately the delta ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important. + // This encoder computes colorspace error in the ICtCp (or more accurately the delta E ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important. // By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m^2), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light). // If the m_rec2020_bt2100_color_gamut flag is true, the input colorspace is treated as REC 2020/BT.2100 (which is wider than 709). - // For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). + // For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). // SDR upconversion to normalized [0,1] (i.e. non-absolute) luminances may work but is not supported because ITP errors will not be predicted correctly. - bool m_rec2020_bt2100_color_gamut = false; + // 11/3/2025: This flag is always copied straight into the output KTX2 DFD colorspace, even for non-HDR formats. + // TODO: Move this parameter to reflect this. + bool m_rec2020_bt2100_color_gamut = false; // levels 0-3 normal levels, 4=exhaustive uint32_t m_master_comp_level = 0; @@ -35,13 +38,13 @@ namespace astc_6x6_hdr float m_jnd_delta_itp_thresh = .75f; bool m_force_one_strip = false; - + bool m_gaussian1_fallback = true; // def to true, if this is disabled m_gaussian2_fallback should be disabled too float m_gaussian1_strength = 1.45f; bool m_gaussian2_fallback = true; // def to true, hopefully rarely kicks in float m_gaussian2_strength = 1.83f; - + // m_disable_delta_endpoint_usage may give a slight increase in RDO ASTC encoding efficiency. It's also faster. bool m_disable_delta_endpoint_usage = false; @@ -67,7 +70,7 @@ namespace astc_6x6_hdr bool m_disable_twothree_subsets = false; // def to false bool m_use_solid_blocks = true; // def to true bool m_use_runs = true; // def to true - bool m_block_stat_optimizations_flag = true; // def to true + bool m_block_stat_optimizations_flag = true; // def to true bool m_rdo_candidate_diversity_boost = true; // def to true float m_rdo_candidate_diversity_boost_bit_window_weight = 1.2f; @@ -75,28 +78,35 @@ namespace astc_6x6_hdr bool m_favor_higher_compression = true; // utilize all modes uint32_t m_num_reuse_xy_deltas = basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS; + // By default, for compatibility with KTX-Software (which uses v1.60), we write v1.6 compatible UASTC HDR 6x6i files. + // The transcoder is compatible with both variants. This setting impacts how 2x2 blocks are upsampled and the initial marker version. + // Eventually once KTX-Software upgrades to the latest version of basisu this will be defaulted to false. + // If this is false a v2.0 or later transcoder is required for UASTC HDR 6x6i. + bool m_write_basisu_1_6_compatible_files = true; + void print() const { - basisu::fmt_debug_printf("m_master_comp_level: {}, m_highest_comp_level: {}\n", m_master_comp_level, m_highest_comp_level); - basisu::fmt_debug_printf("m_lambda: {}\n", m_lambda); - basisu::fmt_debug_printf("m_rec2020_bt2100_color_gamut: {}\n", m_rec2020_bt2100_color_gamut); - basisu::fmt_debug_printf("m_extra_patterns_flag: {}, m_brute_force_partition_matching: {}\n", m_extra_patterns_flag, m_brute_force_partition_matching); - basisu::fmt_debug_printf("m_jnd_optimization: {}, m_jnd_delta_itp_thresh: {}\n", m_jnd_optimization, m_jnd_delta_itp_thresh); - basisu::fmt_debug_printf("m_force_one_strip: {}\n", m_force_one_strip); - basisu::fmt_debug_printf("m_gaussian1_fallback: {}, m_gaussian1_strength: {}\n", m_gaussian1_fallback, m_gaussian1_strength); - basisu::fmt_debug_printf("m_gaussian2_fallback: {}, m_gaussian2_strength: {}\n", m_gaussian2_fallback, m_gaussian2_strength); - basisu::fmt_debug_printf("m_disable_delta_endpoint_usage: {}\n", m_disable_delta_endpoint_usage); - basisu::fmt_debug_printf("m_delta_itp_dark_adjustment: {}\n", m_delta_itp_dark_adjustment); - basisu::fmt_debug_printf("m_debug_images: {}, m_debug_image_prefix: {}\n", m_debug_images, m_debug_image_prefix); - basisu::fmt_debug_printf("m_output_images: {}, m_output_image_prefix: {}\n", m_output_images, m_output_image_prefix); - basisu::fmt_debug_printf("m_image_stats: {}, m_status_output: {}\n", m_image_stats, m_status_output); - basisu::fmt_debug_printf("m_deblocking_flag: {}, m_deblock_penalty_weight: {}\n", m_deblocking_flag, m_deblock_penalty_weight); - basisu::fmt_debug_printf("m_disable_twothree_subsets: {}, m_use_solid_blocks: {}\n", m_disable_twothree_subsets, m_use_solid_blocks); - basisu::fmt_debug_printf("m_use_runs: {}, m_block_stat_optimizations_flag: {}\n", m_use_runs, m_block_stat_optimizations_flag); - basisu::fmt_debug_printf("m_rdo_candidate_diversity_boost: {}, m_rdo_candidate_diversity_boost_bit_window_weight: {}\n", m_rdo_candidate_diversity_boost, m_rdo_candidate_diversity_boost_bit_window_weight); - basisu::fmt_debug_printf("m_favor_higher_compression: {}, m_num_reuse_xy_deltas: {}\n", m_favor_higher_compression, m_num_reuse_xy_deltas); + basisu::fmt_debug_printf(" m_master_comp_level: {}, m_highest_comp_level: {}\n", m_master_comp_level, m_highest_comp_level); + basisu::fmt_debug_printf(" m_lambda: {}\n", m_lambda); + basisu::fmt_debug_printf(" m_rec2020_bt2100_color_gamut: {}\n", m_rec2020_bt2100_color_gamut); + basisu::fmt_debug_printf(" m_extra_patterns_flag: {}, m_brute_force_partition_matching: {}\n", m_extra_patterns_flag, m_brute_force_partition_matching); + basisu::fmt_debug_printf(" m_jnd_optimization: {}, m_jnd_delta_itp_thresh: {}\n", m_jnd_optimization, m_jnd_delta_itp_thresh); + basisu::fmt_debug_printf(" m_force_one_strip: {}\n", m_force_one_strip); + basisu::fmt_debug_printf(" m_gaussian1_fallback: {}, m_gaussian1_strength: {}\n", m_gaussian1_fallback, m_gaussian1_strength); + basisu::fmt_debug_printf(" m_gaussian2_fallback: {}, m_gaussian2_strength: {}\n", m_gaussian2_fallback, m_gaussian2_strength); + basisu::fmt_debug_printf(" m_disable_delta_endpoint_usage: {}\n", m_disable_delta_endpoint_usage); + basisu::fmt_debug_printf(" m_delta_itp_dark_adjustment: {}\n", m_delta_itp_dark_adjustment); + basisu::fmt_debug_printf(" m_debug_images: {}, m_debug_image_prefix: {}\n", m_debug_images, m_debug_image_prefix); + basisu::fmt_debug_printf(" m_output_images: {}, m_output_image_prefix: {}\n", m_output_images, m_output_image_prefix); + basisu::fmt_debug_printf(" m_image_stats: {}, m_status_output: {}\n", m_image_stats, m_status_output); + basisu::fmt_debug_printf(" m_deblocking_flag: {}, m_deblock_penalty_weight: {}\n", m_deblocking_flag, m_deblock_penalty_weight); + basisu::fmt_debug_printf(" m_disable_twothree_subsets: {}, m_use_solid_blocks: {}\n", m_disable_twothree_subsets, m_use_solid_blocks); + basisu::fmt_debug_printf(" m_use_runs: {}, m_block_stat_optimizations_flag: {}\n", m_use_runs, m_block_stat_optimizations_flag); + basisu::fmt_debug_printf(" m_rdo_candidate_diversity_boost: {}, m_rdo_candidate_diversity_boost_bit_window_weight: {}\n", m_rdo_candidate_diversity_boost, m_rdo_candidate_diversity_boost_bit_window_weight); + basisu::fmt_debug_printf(" m_favor_higher_compression: {}, m_num_reuse_xy_deltas: {}\n", m_favor_higher_compression, m_num_reuse_xy_deltas); + basisu::fmt_debug_printf(" m_write_basisu_1_6_compatible_files: {}\n", m_write_basisu_1_6_compatible_files); } - + astc_hdr_6x6_global_config() { } @@ -121,7 +131,7 @@ namespace astc_6x6_hdr basisu::image_metrics m_im_bc6h_log2; basisu::image_metrics m_im_bc6h_half; }; - + // The input image should be unpadded to 6x6 boundaries, i.e. the original unexpanded image. bool compress_photo(const basisu::imagef& orig_src_img, const astc_hdr_6x6_global_config& global_cfg, basisu::job_pool* pJob_pool, basisu::uint8_vec& intermediate_tex_data, basisu::uint8_vec& astc_tex_data, result_metrics& metrics); diff --git a/external/basis_universal/encoder/basisu_astc_hdr_common.cpp b/external/basis_universal/encoder/basisu_astc_hdr_common.cpp index a66ab5837c..d4df682861 100644 --- a/external/basis_universal/encoder/basisu_astc_hdr_common.cpp +++ b/external/basis_universal/encoder/basisu_astc_hdr_common.cpp @@ -1,4 +1,16 @@ // File: basisu_astc_hdr_common.cpp +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "basisu_enc.h" #include "basisu_gpu_texture.h" #include "../transcoder/basisu_astc_helpers.h" @@ -14,6 +26,7 @@ using namespace basist; namespace basisu { +// Beware: the first entry is the # of weight levels for that BISE range. const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33] = { { 2, 0, 64 }, // 0, note ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block) @@ -117,7 +130,7 @@ static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const ba float best_err = BIG_FLOAT_VAL; uint32_t best_qlog = 0; - + double prev_err = BIG_FLOAT_VAL; // For all possible qlog's @@ -141,13 +154,13 @@ static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const ba } prev_err = err; - + // Find best if (err < best_err) { best_err = err; best_qlog = i; - + if (best_err == 0.0f) break; } @@ -171,7 +184,7 @@ static void init_qlog_tables() #if BASISU_MULTITHREADED_INIT job_pool jp(3); - + for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++) { jp.add_job( [bits, &qlog16_to_float]() { compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); }); @@ -328,7 +341,7 @@ static bool compute_least_squares_endpoints_rgb( uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; @@ -339,7 +352,7 @@ static bool compute_least_squares_endpoints_rgb( for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; - + z00 += pSelector_weights[sel][0]; z10 += pSelector_weights[sel][1]; z11 += pSelector_weights[sel][2]; @@ -373,7 +386,7 @@ static bool compute_least_squares_endpoints_rgb( iz01 = -z01 * det; iz10 = -z10 * det; iz11 = z00 * det; - + (*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r); (*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r); @@ -392,7 +405,7 @@ static bool compute_least_squares_endpoints_rgb( l = input_box[0][c]; h = input_box[1][c]; } - + (*pXl)[c] = l; (*pXh)[c] = h; } @@ -429,17 +442,17 @@ static bool compute_least_squares_endpoints_rgb( } static bool compute_least_squares_endpoints_rgb_raw_weights( - uint32_t N, const uint8_t* pRaw_weights, + uint32_t N, const uint8_t* pRaw_weights, vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; - + for (uint32_t i = 0; i < N; i++) { const float wt = (float)pRaw_weights[i] * (1.0f / 64.0f); @@ -541,13 +554,13 @@ static bool compute_least_squares_endpoints_2D( uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, vec2F* pXl, vec2F* pXh, const vec2F* pColors, const aabb2F& input_box) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; - + for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; @@ -599,7 +612,7 @@ static bool compute_least_squares_endpoints_2D( (*pXl)[c] = l; (*pXh)[c] = h; } - + pXl->clamp(0.0f, MAX_QLOG16_VAL); pXh->clamp(0.0f, MAX_QLOG16_VAL); @@ -610,7 +623,7 @@ static bool compute_least_squares_endpoints_1D( uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, vec1F* pXl, vec1F* pXh, const vec1F* pColors, const aabb1F& input_box) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; @@ -668,10 +681,10 @@ static bool compute_least_squares_endpoints_1D( } static bool compute_weighted_least_squares_endpoints_rgb( - uint32_t N, + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, const float* pRaw_weights, /* ti */ const float* pEmphasis_weights /* wi */, - vec3F* pXl, vec3F* pXh, + vec3F* pXl, vec3F* pXh, const vec4F* pColors, /* pi */ const aabb3F& input_box) { @@ -702,7 +715,7 @@ static bool compute_weighted_least_squares_endpoints_rgb( const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2]; weighted_mean_tw += wi * ti; - + weighted_mean_pw[0] += wi * pi_r; weighted_mean_pw[1] += wi * pi_g; weighted_mean_pw[2] += wi * pi_b; @@ -722,7 +735,7 @@ static bool compute_weighted_least_squares_endpoints_rgb( const float wi = pEmphasis_weights[i]; const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i]; const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2]; - + spt[0] += wi * (pi_r - weighted_mean_pw[0]) * (ti - weighted_mean_tw); spt[1] += wi * (pi_g - weighted_mean_pw[1]) * (ti - weighted_mean_tw); spt[2] += wi * (pi_b - weighted_mean_pw[2]) * (ti - weighted_mean_tw); @@ -737,7 +750,7 @@ static bool compute_weighted_least_squares_endpoints_rgb( { float h = weighted_mean_pw[i] + (spt[i] / stt) * (1.0f - weighted_mean_tw); float l = weighted_mean_pw[i] - (spt[i] / stt) * weighted_mean_tw; - + (*pXh)[i] = h; (*pXl)[i] = l; } @@ -748,10 +761,10 @@ static bool compute_weighted_least_squares_endpoints_rgb( return true; } -static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; +vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; -static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index -static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index +uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index +uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index static void encode_astc_hdr_init() { @@ -800,7 +813,7 @@ void astc_hdr_enc_init() astc_hdr_core_init(); - astc_helpers::init_tables(true); + astc_helpers::init_tables(); init_qlog_tables(); @@ -821,7 +834,7 @@ void interpolate_qlog12_colors( { for (uint32_t j = 0; j < 3; j++) { - assert(in_range(e[i][j], 0, 0xFFF)); + assert(is_in_range(e[i][j], 0, 0xFFF)); } } @@ -1128,7 +1141,7 @@ double eval_selectors( const vec3F low_color((float)pDecoded_half[lo_index * 3 + 0], (float)pDecoded_half[lo_index * 3 + 1], (float)pDecoded_half[lo_index * 3 + 2]); const vec3F high_color((float)pDecoded_half[hi_index * 3 + 0], (float)pDecoded_half[hi_index * 3 + 1], (float)pDecoded_half[hi_index * 3 + 2]); const vec3F mid_color((float)pDecoded_half[mid_index * 3 + 0], (float)pDecoded_half[mid_index * 3 + 1], (float)pDecoded_half[mid_index * 3 + 2]); - + const vec3F block_dir(high_color - low_color); for (uint32_t p = 0; p < num_pixels; p++) @@ -1140,11 +1153,11 @@ double eval_selectors( const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias); const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias); const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias); - + // Determine which side of the middle plane the point is for a modest gain vec3F c((float)desired_r - mid_color[0], (float)desired_g - mid_color[1], (float)desired_b - mid_color[2]); float d = c.dot(block_dir); - + int i = 0, high_index = (num_weight_levels / 2) + 1; if (d >= 0.0f) { @@ -1240,11 +1253,11 @@ double eval_selectors_dual_plane( const uint32_t first_channel = (channel_index + 1) % 3; const uint32_t second_channel = (channel_index + 2) % 3; - + // First plane const double first_channel_weight = channel_weights[first_channel]; const double second_channel_weight = channel_weights[second_channel]; - + for (uint32_t p = 0; p < num_pixels; p++) { const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; @@ -1284,7 +1297,7 @@ double eval_selectors_dual_plane( const half_float* pDesired_half = &pBlock_pixels_half[p * 3]; const double desired_half_a_q = q(pDesired_half[channel_index], coptions.m_q_log_bias); - + double lowest_e = BIG_FLOAT_VAL; // this is an approximation of MSLE @@ -1664,7 +1677,7 @@ bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2 v4 |= (best_vd0 & 31); v5 |= (best_vd1 & 31); - assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255)); + assert(is_in_range(v0, 0, 255) && is_in_range(v1, 0, 255) && is_in_range(v2, 0, 255) && is_in_range(v3, 0, 255) && is_in_range(v4, 0, 255) && is_in_range(v5, 0, 255)); pEndpoints[0] = (uint8_t)v0; pEndpoints[1] = (uint8_t)v1; @@ -1711,7 +1724,7 @@ bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2 bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh) { assert(submode <= 7); - + const uint32_t a_bits = 9 + (submode >> 1); const int max_a_val = (1 << a_bits) - 1; @@ -1803,7 +1816,7 @@ void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16) // this quantizes R and G as 7 bits vs. 8, for grayscale. //l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)] << 1; //h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)] << 1; - + l_q = minimum(l_q, MAX_QLOG8); h_q = minimum(h_q, MAX_QLOG8); } @@ -1982,7 +1995,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[0], 8); // R8 x2 = get_bit(qlog[0], 7); // R7 x3 = get_bit(qlog[0], 10); // R10 - x4 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[0], 6); // R6 x5 = get_bit(qlog[3], 6); // S6 x6 = get_bit(qlog[3], 5); // S5 break; @@ -1996,7 +2009,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[1], 5); // G5 x2 = get_bit(qlog[0], 7); // R7 x3 = get_bit(qlog[2], 5); // B5 - x4 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[0], 6); // R6 x5 = get_bit(qlog[0], 10); // R10 x6 = get_bit(qlog[0], 9); // R9 break; @@ -2010,7 +2023,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[0], 8); // R8 x2 = get_bit(qlog[0], 7); // R7 x3 = get_bit(qlog[0], 6); // R6 - x4 = get_bit(qlog[3], 7); // S7 + x4 = get_bit(qlog[3], 7); // S7 x5 = get_bit(qlog[3], 6); // S6 x6 = get_bit(qlog[3], 5); // S5 break; @@ -2024,7 +2037,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[1], 5); // G5 x2 = get_bit(qlog[0], 7); // R7 x3 = get_bit(qlog[2], 5); // B5 - x4 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[0], 6); // R6 x5 = get_bit(qlog[3], 6); // S6 x6 = get_bit(qlog[3], 5); // S5 break; @@ -2039,7 +2052,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[1], 5); // G5 x2 = get_bit(qlog[2], 6); // B6 x3 = get_bit(qlog[2], 5); // B5 - x4 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[0], 6); // R6 x5 = get_bit(qlog[0], 7); // R7 x6 = get_bit(qlog[3], 5); // S5 break; @@ -2052,7 +2065,7 @@ bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& x1 = get_bit(qlog[1], 5); // G5 x2 = get_bit(qlog[2], 6); // B6 x3 = get_bit(qlog[2], 5); // B5 - x4 = get_bit(qlog[0], 6); // R6 + x4 = get_bit(qlog[0], 6); // R6 x5 = get_bit(qlog[3], 6); // S6 x6 = get_bit(qlog[3], 5); // S5 break; @@ -2143,7 +2156,7 @@ bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints) pEndpoints[1] = (uint8_t)desc.m_b1; pEndpoints[3] = (uint8_t)desc.m_d0; pEndpoints[5] = (uint8_t)desc.m_d1 | 128; - + return true; } @@ -2161,9 +2174,9 @@ bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints) return false; const int va = desc.m_a, vb0 = desc.m_b0, vb1 = desc.m_b1, vc = desc.m_c, vd0 = desc.m_d0, vd1 = desc.m_d1; - + int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0; - + int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0; switch (desc.m_submode) { @@ -2222,7 +2235,7 @@ bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints) v4 |= (vd0 & 31); v5 |= (vd1 & 31); - assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255)); + assert(is_in_range(v0, 0, 255) && is_in_range(v1, 0, 255) && is_in_range(v2, 0, 255) && is_in_range(v3, 0, 255) && is_in_range(v4, 0, 255) && is_in_range(v5, 0, 255)); pEndpoints[0] = (uint8_t)v0; pEndpoints[1] = (uint8_t)v1; @@ -2236,7 +2249,7 @@ bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints) static inline int astc_hdr_sign_extend(int src, int num_src_bits) { - assert(basisu::in_range(num_src_bits, 2, 31)); + assert(basisu::is_in_range(num_src_bits, 2, 31)); const bool negative = (src & (1 << (num_src_bits - 1))) != 0; if (negative) @@ -2261,7 +2274,7 @@ void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc) desc.m_b1 = pEndpoints[1]; desc.m_d0 = pEndpoints[3]; desc.m_d1 = pEndpoints[5] & 0x7F; - + return; } @@ -2384,7 +2397,7 @@ void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int &maj bool pack_mode11( const vec3F& low_color_q16, const vec3F& high_color_q16, - uint32_t ise_endpoint_range, uint8_t* pEndpoints, + uint32_t ise_endpoint_range, uint8_t* pEndpoints, const astc_hdr_codec_base_options& coptions, bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used) { @@ -2521,7 +2534,7 @@ bool pack_mode11( } // d } // c } // if (coptions.m_ultra_quant) - + submode_used = best_submode + 1; return (best_trial_dist != BIG_FLOAT_VAL); @@ -3039,7 +3052,7 @@ bool try_mode7( clear_obj(best_trial_endpoints); double best_trial_dist = BIG_FLOAT_VAL; int best_trial_submode = 0; - + for (int submode = first_submode; submode <= last_submode; submode++) { const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16; @@ -3238,9 +3251,9 @@ double encode_astc_hdr_block_mode_11( high_color_q16 = pBlock_pixels_q16[i]; } } - + vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16); - + for (uint32_t i = 0; i < 3; i++) { low_color_q16[i] = lerp(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f); @@ -3255,7 +3268,7 @@ double encode_astc_hdr_block_mode_11( clear_obj(trial_blk_weights); double trial_blk_error = BIG_FLOAT_VAL; - + bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, low_color_q16, high_color_q16, pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, @@ -3332,7 +3345,7 @@ double encode_astc_hdr_block_mode_11( if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16)) break; - + bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode, l_q16, h_q16, pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, @@ -3361,7 +3374,7 @@ double encode_astc_hdr_block_mode_11( float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT; if (opt_mode == cWeightedLeastSquaresHeavy) lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY; - + for (uint32_t i = 0; i < num_pixels; i++) { vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16); @@ -3370,7 +3383,7 @@ double encode_astc_hdr_block_mode_11( assert((kd >= l) && (kd <= h)); float v = (kd - l) / (h - l); - + if (v < mid) v = lerp(lw, mw, v / mid); else @@ -3659,7 +3672,7 @@ double encode_astc_hdr_block_downsampled_mode_11( clear_obj(trial_blk_endpoints); clear_obj(trial_blk_weights); - + double trial_blk_error = BIG_FLOAT_VAL; bool could_pack = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode, @@ -3700,7 +3713,7 @@ double encode_astc_hdr_block_downsampled_mode_11( } else if (pass) break; - + if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) { float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; @@ -3801,7 +3814,7 @@ double encode_astc_hdr_block_mode_11_dual_plane( assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode)); assert(last_submode <= MAX_MODE11_SUBMODE_INDEX); - + assert(num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS); best_submode = 0; @@ -3877,7 +3890,7 @@ double encode_astc_hdr_block_mode_11_dual_plane( double trial_blk_error = BIG_FLOAT_VAL; bool did_improve = try_mode11_dual_plane(channel_index, num_pixels, trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_best_submode, - low_color_q16, high_color_q16, + low_color_q16, high_color_q16, pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors, first_submode, last_submode, ignore_clamping); @@ -3951,7 +3964,7 @@ double encode_astc_hdr_block_mode_11_dual_plane( memcpy(trial_blk_weights1, blk_weights1, num_pixels); } // pass - + return cur_block_error; } @@ -3966,7 +3979,7 @@ double encode_astc_hdr_block_mode_7( uint8_t* blk_endpoints, //[4] uint8_t* blk_weights, // [num_pixels] const astc_hdr_codec_base_options& coptions, - uint32_t ise_endpoint_range, + uint32_t ise_endpoint_range, int first_submode, int last_submode, const encode_astc_block_stats* pBlock_stats) { @@ -4012,7 +4025,7 @@ double encode_astc_hdr_block_mode_7( vec3F diff(high_color_q16 - low_color_q16); - // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, + // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, // i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259). float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0]; @@ -4085,7 +4098,7 @@ double encode_astc_hdr_block_mode_7( vec3F alt_diff(alt_high_color_q16 - alt_low_color_q16); - // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, + // The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0, // i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259). float alt_s_q16 = alt_diff.dot(block_axis_q16) * block_axis_q16[0]; @@ -4221,6 +4234,7 @@ void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t } //-------------------------------------------------------------------------------------------------------------------------- +// Precomputed matrices via SLSQP (Sequential Least-Squares Quadratic Programming - scipy.optimize.minimize). Sharper results vs. other methods (like adjoint). // For each output (2x2) sample, the weight of each input (6x6) sample. static const float g_weight_downsample_6x6_to_2x2[4][36] = { @@ -4724,11 +4738,14 @@ static const float g_weight_downsample_6x6_to_6x6[36][36] = { //-------------------------------------------------------------------------------------------------------------------------- -const struct downsample_matrix_6x6 +struct downsample_matrix { uint32_t m_grid_width, m_grid_height; const float* m_p; -} g_downsample_matrices_6x6[] = { +}; + +downsample_matrix g_downsample_matrices_6x6[] = +{ { 2, 2, (const float*)g_weight_downsample_6x6_to_2x2 }, { 3, 2, (const float*)g_weight_downsample_6x6_to_3x2 }, { 4, 2, (const float*)g_weight_downsample_6x6_to_4x2 }, @@ -4757,11 +4774,892 @@ const struct downsample_matrix_6x6 }; //const uint32_t NUM_DOWNSAMPLE_MATRICES_6x6 = sizeof(g_downsample_matrices_6x6) / sizeof(g_downsample_matrices_6x6[0]); +//-------------------------------------------------------------------------------------------------------------------------- +// +// For each output (2x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_2x2[4][48] = { +{0.137431f, 0.119592f, 0.085575f, 0.056401f, 0.030751f, 0.000000f, 0.000000f, 0.000000f, 0.108851f, 0.086312f, 0.064884f, 0.039119f, 0.027653f, 0.000000f, 0.000000f, 0.000000f, 0.073703f, 0.067584f, 0.045034f, 0.032697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024414f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.033828f, 0.058911f, 0.081870f, 0.120975f, 0.137384f, 0.000000f, 0.000000f, 0.000000f, 0.026912f, 0.038126f, 0.065247f, 0.083628f, 0.109730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037909f, 0.044325f, 0.065160f, 0.074043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021952f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074133f, 0.065243f, 0.043065f, 0.035114f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105931f, 0.087385f, 0.065848f, 0.035699f, 0.030068f, 0.000000f, 0.000000f, 0.000000f, 0.136321f, 0.121324f, 0.086171f, 0.057503f, 0.031553f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024251f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037022f, 0.042379f, 0.063662f, 0.075871f, 0.000000f, 0.000000f, 0.000000f, 0.031315f, 0.037129f, 0.065785f, 0.084055f, 0.107841f, 0.000000f, 0.000000f, 0.000000f, 0.030537f, 0.057932f, 0.086040f, 0.120055f, 0.136127f}, +}; + +// For each output (3x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_3x2[6][48] = { +{0.212556f, 0.137038f, 0.067006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172663f, 0.105023f, 0.058944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113989f, 0.074111f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037147f, 0.021524f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.077366f, 0.142656f, 0.145067f, 0.074900f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.048644f, 0.106713f, 0.104141f, 0.052434f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.048972f, 0.079367f, 0.079508f, 0.040229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064479f, 0.139823f, 0.212207f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053987f, 0.104596f, 0.171728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026564f, 0.071759f, 0.119334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035524f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037522f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.115689f, 0.072510f, 0.021389f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170967f, 0.106096f, 0.061696f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210888f, 0.137969f, 0.065274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045147f, 0.080905f, 0.078591f, 0.043486f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045421f, 0.106778f, 0.106427f, 0.050794f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079169f, 0.139959f, 0.144180f, 0.079143f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021724f, 0.070791f, 0.117496f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059938f, 0.109787f, 0.170583f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064517f, 0.139526f, 0.211698f}, +}; + +// For each output (4x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_4x2[8][48] = { +{0.275657f, 0.133248f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.225305f, 0.089819f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.147466f, 0.079439f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049065f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.071558f, 0.188360f, 0.141460f, 0.027429f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068719f, 0.139588f, 0.107851f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024602f, 0.112032f, 0.076880f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019401f, 0.000000f, 0.022120f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.025244f, 0.140416f, 0.189606f, 0.065541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021281f, 0.106671f, 0.142270f, 0.062848f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068039f, 0.102306f, 0.026541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023517f, 0.025720f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.136533f, 0.275463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086827f, 0.223674f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077361f, 0.153684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.048293f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149189f, 0.077647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222753f, 0.093443f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.273639f, 0.135036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022695f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027966f, 0.116923f, 0.074704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066610f, 0.140552f, 0.119791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070250f, 0.192769f, 0.140414f, 0.027327f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026026f, 0.032280f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073723f, 0.105102f, 0.027631f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113307f, 0.139466f, 0.059915f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027161f, 0.140907f, 0.189935f, 0.064546f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074412f, 0.151685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094074f, 0.223897f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.136604f, 0.274053f}, +}; + +// For each output (5x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_5x2[10][48] = { +{0.298257f, 0.099048f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.242705f, 0.083012f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.155959f, 0.035340f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054463f, 0.031217f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.149629f, 0.250491f, 0.037003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113317f, 0.192720f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093738f, 0.138010f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025093f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.193314f, 0.196494f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163178f, 0.158983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112334f, 0.115733f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028572f, 0.031390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028975f, 0.256222f, 0.142262f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.191874f, 0.111703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.137754f, 0.096234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034976f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105369f, 0.297279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081692f, 0.239675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031939f, 0.162333f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031404f, 0.050308f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053972f, 0.028379f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158432f, 0.035219f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.238959f, 0.089734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.294641f, 0.100664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.090008f, 0.147020f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.103221f, 0.190008f, 0.024843f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.139784f, 0.245082f, 0.025860f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032527f, 0.032618f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.117780f, 0.108323f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.155910f, 0.159880f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197210f, 0.195753f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042681f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.138684f, 0.099059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186926f, 0.105714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029545f, 0.254477f, 0.142915f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029953f, 0.051219f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029174f, 0.163463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087461f, 0.240531f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.103819f, 0.294380f}, +}; + +// For each output (6x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_6x2[12][48] = { +{0.362153f, 0.050427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296074f, 0.031598f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067197f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.240020f, 0.169624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196469f, 0.128913f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131714f, 0.098049f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035210f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.105361f, 0.301218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086270f, 0.220336f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047552f, 0.171037f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022966f, 0.045259f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.287211f, 0.111854f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.224383f, 0.097742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167408f, 0.037607f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036827f, 0.036969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152162f, 0.235841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108280f, 0.202388f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091687f, 0.151852f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051343f, 0.374208f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.304381f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207583f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062485f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193058f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.290484f, 0.038424f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357650f, 0.055589f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035640f, 0.019558f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.133571f, 0.100435f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184400f, 0.125111f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.228117f, 0.173168f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043438f, 0.175074f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089766f, 0.235789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108452f, 0.302770f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037495f, 0.032008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168503f, 0.033572f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.226763f, 0.101709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.292934f, 0.107016f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019003f, 0.018791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100854f, 0.125828f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107572f, 0.206978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169736f, 0.251237f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024678f, 0.204824f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.301594f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040204f, 0.368158f}, +}; + +// For each output (7x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_7x2[14][48] = { +{0.396534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324924f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210380f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.365804f, 0.047637f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.288211f, 0.031570f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215416f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051362f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.277573f, 0.121338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.219048f, 0.084370f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023178f, 0.000000f, 0.161469f, 0.031346f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034866f, 0.046814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.194115f, 0.218789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163854f, 0.137782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020281f, 0.000000f, 0.127129f, 0.138049f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089911f, 0.279003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100285f, 0.229490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026109f, 0.164969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036219f, 0.074014f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033369f, 0.385493f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.300028f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222803f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058307f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395806f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320906f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218670f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064618f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064591f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324054f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398346f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.280900f, 0.028228f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.364696f, 0.054830f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040226f, 0.027986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172678f, 0.019447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.228976f, 0.118935f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.278251f, 0.113500f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.017206f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022203f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022373f, 0.000000f, 0.138786f, 0.130317f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024343f, 0.000000f, 0.127713f, 0.134415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.187440f, 0.195205f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033347f, 0.041046f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029210f, 0.133093f, 0.000000f, 0.020285f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102427f, 0.246296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104431f, 0.289864f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027153f, 0.048478f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032573f, 0.217822f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.278933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022617f, 0.372424f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.219494f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394594f}, +}; + +// For each output (8x2) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_8x2[16][48] = { +{0.397679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208885f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067897f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.394986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218305f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063158f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.400685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059075f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.398573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319207f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323398f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062260f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.404990f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207631f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065371f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212780f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321731f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064285f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212540f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398456f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063907f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.221286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.221627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320522f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393476f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067161f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214405f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322795f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395638f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065100f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209382f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325769f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399749f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207268f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.318619f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401935f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063557f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217484f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316546f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402413f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061762f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218082f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395552f}, +}; + +// For each output (2x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_2x3[6][48] = { +{0.205910f, 0.181220f, 0.131230f, 0.084091f, 0.045598f, 0.000000f, 0.000000f, 0.000000f, 0.115248f, 0.106195f, 0.073083f, 0.057425f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.054674f, 0.092055f, 0.125587f, 0.176378f, 0.202284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.055452f, 0.075306f, 0.102574f, 0.115689f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044070f, 0.029520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.136903f, 0.115512f, 0.084403f, 0.050846f, 0.035490f, 0.000000f, 0.000000f, 0.000000f, 0.143459f, 0.115683f, 0.085020f, 0.053056f, 0.036572f, 0.000000f, 0.000000f, 0.000000f, 0.043466f, 0.026000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025190f, 0.040099f, 0.000000f, 0.000000f, 0.000000f, 0.037965f, 0.050927f, 0.083471f, 0.112563f, 0.137468f, 0.000000f, 0.000000f, 0.000000f, 0.033927f, 0.046348f, 0.085573f, 0.114643f, 0.134372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024810f, 0.028641f, 0.044003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111326f, 0.107232f, 0.073233f, 0.050676f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204047f, 0.179532f, 0.131819f, 0.088809f, 0.053325f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023277f, 0.054224f, 0.067723f, 0.100097f, 0.113199f, 0.000000f, 0.000000f, 0.000000f, 0.047881f, 0.085543f, 0.130088f, 0.176198f, 0.201769f}, +}; + +// For each output (3x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_3x3[9][48] = { +{0.327238f, 0.215195f, 0.108640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184524f, 0.118385f, 0.046018f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.109423f, 0.206952f, 0.207632f, 0.108494f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064973f, 0.120899f, 0.114663f, 0.066964f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107663f, 0.213426f, 0.326644f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045643f, 0.119988f, 0.186636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060005f, 0.030140f, 0.020392f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193258f, 0.127396f, 0.061395f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196600f, 0.132656f, 0.063337f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060793f, 0.029915f, 0.024113f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032682f, 0.042599f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070428f, 0.145040f, 0.144782f, 0.074883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069308f, 0.145612f, 0.133265f, 0.071190f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035901f, 0.034311f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030350f, 0.056939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060846f, 0.125850f, 0.201518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063906f, 0.129434f, 0.203119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035006f, 0.026673f, 0.066360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184897f, 0.119434f, 0.045977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.328093f, 0.217057f, 0.104542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064974f, 0.120280f, 0.118724f, 0.069494f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111457f, 0.199814f, 0.204785f, 0.110472f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038193f, 0.124885f, 0.182125f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105011f, 0.218548f, 0.331237f}, +}; + +// For each output (4x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_4x3[12][48] = { +{0.424820f, 0.213734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237540f, 0.123907f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.101064f, 0.293828f, 0.214193f, 0.045263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051229f, 0.170008f, 0.124414f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.043452f, 0.216897f, 0.293802f, 0.110908f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114842f, 0.173267f, 0.046832f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204747f, 0.427412f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.126209f, 0.241633f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087490f, 0.023647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277233f, 0.116842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.282751f, 0.124394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087642f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024375f, 0.043221f, 0.025504f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.075199f, 0.165822f, 0.130107f, 0.031544f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074010f, 0.171441f, 0.131257f, 0.016920f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037357f, 0.043775f, 0.029468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034358f, 0.046676f, 0.025003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026567f, 0.127081f, 0.172282f, 0.077309f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028046f, 0.132256f, 0.162992f, 0.075728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033213f, 0.036679f, 0.021810f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.083610f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116623f, 0.293550f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118246f, 0.292686f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095285f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.234002f, 0.132935f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.422801f, 0.210262f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037740f, 0.173712f, 0.127636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107054f, 0.296425f, 0.213343f, 0.044090f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122782f, 0.174732f, 0.044321f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046279f, 0.214323f, 0.289278f, 0.108285f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125079f, 0.236461f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208583f, 0.429877f}, +}; + +// For each output (5x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_5x3[15][48] = { +{0.490219f, 0.168976f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.273361f, 0.067444f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.213329f, 0.380538f, 0.048722f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.138224f, 0.219188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.309867f, 0.312289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.189101f, 0.188743f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037522f, 0.380550f, 0.216834f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.225818f, 0.139276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164462f, 0.488476f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072635f, 0.274427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085550f, 0.041856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277218f, 0.100778f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279523f, 0.102655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086943f, 0.025474f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018474f, 0.000000f, 0.000000f, 0.023807f, 0.063654f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.142638f, 0.245307f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.145790f, 0.254064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047600f, 0.058666f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047090f, 0.051660f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197880f, 0.207261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205538f, 0.186457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052816f, 0.051298f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018852f, 0.055366f, 0.033613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247747f, 0.138008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030549f, 0.240788f, 0.147930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066598f, 0.020549f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031861f, 0.081013f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095562f, 0.286515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091897f, 0.287997f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038590f, 0.086564f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.268683f, 0.083034f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.485628f, 0.162655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121869f, 0.229484f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218817f, 0.384593f, 0.045237f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.182342f, 0.183530f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320205f, 0.313923f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217960f, 0.138650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051048f, 0.375126f, 0.217217f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064150f, 0.273673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169346f, 0.492831f}, +}; + +// For each output (6x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_6x3[18][48] = { +{0.567729f, 0.085252f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316321f, 0.030698f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.359927f, 0.264711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204426f, 0.170936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.160854f, 0.493683f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.055911f, 0.289551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471204f, 0.180222f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281132f, 0.067442f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.244512f, 0.369052f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158920f, 0.227515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066465f, 0.597036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.336500f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104579f, 0.023148f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.338908f, 0.039468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.344319f, 0.042826f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059448f, 0.022978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.245888f, 0.156583f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.251094f, 0.164427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073868f, 0.025715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047831f, 0.060057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116572f, 0.271105f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108894f, 0.276085f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039515f, 0.079942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080438f, 0.048264f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267123f, 0.113138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.263081f, 0.110654f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077711f, 0.039591f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020193f, 0.059109f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.154371f, 0.249388f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148917f, 0.263084f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021121f, 0.083817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024900f, 0.107003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.375065f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114175f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.311342f, 0.043011f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.565421f, 0.080225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192162f, 0.168731f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.354606f, 0.265733f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069515f, 0.282839f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159765f, 0.487881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.278646f, 0.072312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480532f, 0.168510f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157488f, 0.194745f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261639f, 0.386129f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043524f, 0.320675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.055191f, 0.580610f}, +}; + +// For each output (7x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_7x3[21][48] = { +{0.641452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.358548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.571435f, 0.068076f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.330216f, 0.030272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.442607f, 0.191771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.243785f, 0.063036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018329f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019157f, 0.000000f, 0.021315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.273064f, 0.307420f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195541f, 0.177034f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022294f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024647f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.151030f, 0.456644f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.078617f, 0.291813f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060980f, 0.596856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.342163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639429f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378786f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387691f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.090755f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356378f, 0.041502f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359468f, 0.040845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091221f, 0.019830f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.078340f, 0.030772f, 0.000000f, 0.017555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267597f, 0.100863f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.271447f, 0.100798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064330f, 0.068296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044982f, 0.034940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021793f, 0.000000f, 0.194246f, 0.216278f, 0.000000f, 0.022234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203237f, 0.184740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019217f, 0.018086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016776f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047044f, 0.060726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086110f, 0.270497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100587f, 0.267194f, 0.000000f, 0.020092f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050739f, 0.097011f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023976f, 0.094747f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036130f, 0.353791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032724f, 0.369552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089080f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107420f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114916f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.354042f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645958f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.337170f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589668f, 0.073162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281005f, 0.071771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.450506f, 0.196718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021998f, 0.000000f, 0.000000f, 0.025261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032091f, 0.000000f, 0.182952f, 0.186377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.270805f, 0.280517f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020667f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064614f, 0.248064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.182212f, 0.484444f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046780f, 0.341462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041817f, 0.569940f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355095f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644905f}, +}; + +// For each output (8x3) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_8x3[24][48] = { +{0.642405f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357595f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.643957f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.642833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.637580f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362420f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.637481f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646282f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640587f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.379885f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.389232f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116950f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104449f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396859f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102359f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101667f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096440f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.392155f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400404f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114593f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.389960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109021f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388517f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105580f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108474f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.389562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106886f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.392295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113215f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.643079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.636256f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.643823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.354225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645775f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359749f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640251f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.364443f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.635557f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353912f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646088f}, +}; + +// For each output (2x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_2x4[8][48] = { +{0.266475f, 0.237248f, 0.170961f, 0.108932f, 0.059980f, 0.000000f, 0.000000f, 0.000000f, 0.069153f, 0.052080f, 0.035172f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.071584f, 0.118291f, 0.158003f, 0.229344f, 0.262308f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040608f, 0.047117f, 0.072745f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.133546f, 0.123736f, 0.085634f, 0.071146f, 0.020522f, 0.000000f, 0.000000f, 0.000000f, 0.181365f, 0.152470f, 0.109189f, 0.071277f, 0.051114f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068769f, 0.083081f, 0.122611f, 0.135462f, 0.000000f, 0.000000f, 0.000000f, 0.052661f, 0.073804f, 0.122675f, 0.158233f, 0.182705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.185771f, 0.157833f, 0.115265f, 0.071389f, 0.049909f, 0.000000f, 0.000000f, 0.000000f, 0.134315f, 0.122577f, 0.090159f, 0.072782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049580f, 0.068443f, 0.120275f, 0.155720f, 0.183091f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072223f, 0.092680f, 0.123123f, 0.134866f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061367f, 0.051211f, 0.034360f, 0.000000f, 0.028160f, 0.000000f, 0.000000f, 0.000000f, 0.255536f, 0.224675f, 0.167736f, 0.113503f, 0.063453f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033855f, 0.000000f, 0.030092f, 0.044250f, 0.067673f, 0.000000f, 0.000000f, 0.000000f, 0.059731f, 0.111955f, 0.169044f, 0.224131f, 0.259268f}, +}; + +// For each output (3x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_3x4[12][48] = { +{0.405143f, 0.264455f, 0.127900f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105076f, 0.051679f, 0.045747f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.025952f, 0.148689f, 0.283429f, 0.283899f, 0.145415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061558f, 0.051058f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.124702f, 0.268998f, 0.405480f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043101f, 0.052379f, 0.105340f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214261f, 0.145181f, 0.047508f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296952f, 0.196156f, 0.099941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084673f, 0.137735f, 0.144414f, 0.077484f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086806f, 0.178074f, 0.179109f, 0.089543f, 0.022161f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050723f, 0.149013f, 0.214357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101549f, 0.190388f, 0.293970f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.293440f, 0.200404f, 0.104808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212205f, 0.141684f, 0.047458f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085757f, 0.179609f, 0.175648f, 0.084745f, 0.021210f, 0.000000f, 0.000000f, 0.000000f, 0.083231f, 0.140659f, 0.147264f, 0.081878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104715f, 0.195444f, 0.297105f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052478f, 0.135662f, 0.214595f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105858f, 0.047177f, 0.044681f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.407919f, 0.269431f, 0.124933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066066f, 0.061881f, 0.023069f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149307f, 0.272481f, 0.277246f, 0.149950f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036865f, 0.065377f, 0.096438f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123758f, 0.269301f, 0.408262f}, +}; + +// For each output (4x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_4x4[16][48] = { +{0.550981f, 0.273527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.143555f, 0.031938f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.122629f, 0.360487f, 0.261668f, 0.049773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061033f, 0.081604f, 0.062805f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.049839f, 0.269578f, 0.365997f, 0.133966f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.048352f, 0.083803f, 0.048464f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267525f, 0.553972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034129f, 0.144375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277118f, 0.159322f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390449f, 0.173111f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047384f, 0.191890f, 0.131656f, 0.024565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109738f, 0.256529f, 0.192107f, 0.046132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031695f, 0.141682f, 0.193059f, 0.054775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036195f, 0.182374f, 0.246275f, 0.113945f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.160040f, 0.281798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166904f, 0.391257f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.392178f, 0.179451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279598f, 0.148773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107261f, 0.247609f, 0.198942f, 0.036907f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054678f, 0.195067f, 0.134127f, 0.025410f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017019f, 0.017319f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032887f, 0.182133f, 0.239063f, 0.107658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026552f, 0.139058f, 0.187193f, 0.051118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169923f, 0.395389f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148923f, 0.285765f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.142165f, 0.038534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.547445f, 0.271856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044944f, 0.076529f, 0.068448f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125039f, 0.368874f, 0.262015f, 0.054151f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059929f, 0.083064f, 0.044633f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053433f, 0.265593f, 0.362429f, 0.130919f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045972f, 0.135681f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.264414f, 0.553933f}, +}; + +// For each output (5x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_5x4[20][48] = { +{0.596845f, 0.198746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148428f, 0.055981f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.278053f, 0.491329f, 0.050522f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064229f, 0.115868f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.404918f, 0.399709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097883f, 0.097489f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050295f, 0.498737f, 0.280436f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.117869f, 0.052664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200415f, 0.589668f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063856f, 0.146061f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.306027f, 0.097934f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.428737f, 0.167302f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.155850f, 0.258285f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.187173f, 0.344891f, 0.035315f, 0.000000f, 0.018485f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212411f, 0.213232f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.283532f, 0.290826f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022380f, 0.255191f, 0.169763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020378f, 0.342025f, 0.190264f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089095f, 0.316913f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159089f, 0.434903f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.436982f, 0.169707f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.310539f, 0.082773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.187439f, 0.337224f, 0.031428f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167442f, 0.252995f, 0.023472f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.298614f, 0.285810f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.206405f, 0.209172f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019544f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033200f, 0.325724f, 0.185761f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030366f, 0.251622f, 0.153784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.161862f, 0.437691f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086681f, 0.313765f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149673f, 0.068654f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589414f, 0.192260f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038852f, 0.121054f, 0.025391f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.280331f, 0.492424f, 0.041948f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095308f, 0.102698f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.407796f, 0.394198f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106939f, 0.057645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058299f, 0.489157f, 0.287960f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063501f, 0.142763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196593f, 0.597142f}, +}; + +// For each output (6x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_6x4[24][48] = { +{0.723801f, 0.094637f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.476584f, 0.344817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116143f, 0.062457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.194537f, 0.608409f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061561f, 0.135493f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.579284f, 0.209203f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.135477f, 0.076035f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.308340f, 0.460085f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052476f, 0.139411f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019970f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082209f, 0.732181f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.185611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.358932f, 0.060659f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503915f, 0.076494f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237301f, 0.199098f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.332364f, 0.231237f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.088364f, 0.322995f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173711f, 0.414930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.312366f, 0.093336f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.392413f, 0.164056f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019281f, 0.018548f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.178453f, 0.229682f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214423f, 0.359860f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071976f, 0.390475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.537548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.515147f, 0.078582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.364623f, 0.041649f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.337054f, 0.220008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.249141f, 0.193797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168802f, 0.423188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084285f, 0.323725f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411061f, 0.182411f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.329651f, 0.076877f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193953f, 0.352033f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.188543f, 0.265471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050266f, 0.555034f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394700f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179003f, 0.029987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.700087f, 0.090924f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019171f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.099147f, 0.059028f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.470203f, 0.352451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.075527f, 0.135452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184084f, 0.604937f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.136189f, 0.084874f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.576900f, 0.202037f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041868f, 0.099347f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.343377f, 0.515408f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044581f, 0.169532f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062013f, 0.723875f}, +}; + +// For each output (7x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_7x4[28][48] = { +{0.798509f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.716711f, 0.085583f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167498f, 0.030208f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.538182f, 0.218008f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114187f, 0.070138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020226f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020777f, 0.000000f, 0.000000f, 0.018482f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.367283f, 0.403492f, 0.000000f, 0.017972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071839f, 0.050645f, 0.000000f, 0.023445f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020007f, 0.000000f, 0.000000f, 0.000000f, 0.022030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023286f, 0.000000f, 0.000000f}, +{0.000000f, 0.026415f, 0.000000f, 0.000000f, 0.165810f, 0.526162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086343f, 0.166394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028875f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068792f, 0.750632f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.180576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401325f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.563541f, 0.035134f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393109f, 0.035360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.514780f, 0.056751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286324f, 0.066048f, 0.000000f, 0.022966f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397320f, 0.167136f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024391f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018733f, 0.017081f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.228689f, 0.212401f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027812f, 0.000000f, 0.230123f, 0.251307f, 0.000000f, 0.015952f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018366f, 0.015349f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089768f, 0.272262f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165947f, 0.450195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021828f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064329f, 0.394519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021491f, 0.519661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.420154f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.579846f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.561993f, 0.042727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395280f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507366f, 0.060806f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388432f, 0.043397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.017057f, 0.019075f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399856f, 0.181694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.283918f, 0.098400f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018320f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261768f, 0.263599f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210680f, 0.218119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019283f, 0.018776f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.156143f, 0.407378f, 0.000000f, 0.018410f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081168f, 0.298842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043712f, 0.524648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025861f, 0.405779f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027775f, 0.567781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.404444f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.202734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.797266f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.736579f, 0.098573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.139627f, 0.082102f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.529383f, 0.220315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020496f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031087f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029563f, 0.000000f, 0.069934f, 0.077745f, 0.000000f, 0.000000f, 0.000000f, 0.019031f, 0.000000f, 0.000000f, 0.369058f, 0.383087f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072848f, 0.128566f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.206674f, 0.591912f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028891f, 0.164765f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054845f, 0.751498f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.813218f}, +}; + +// For each output (8x4) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_8x4[32][48] = { +{0.800445f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.801084f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198916f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.802438f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.800166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199834f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.808142f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.191858f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801414f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198586f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798600f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201400f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800453f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199547f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415774f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584226f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.407361f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.592639f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411487f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.416734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583266f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409794f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590206f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.419797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580203f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588149f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411851f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.591287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587561f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412439f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589820f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585460f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.414540f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587115f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412885f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415538f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.799529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195628f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804438f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.794225f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197129f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.802871f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193175f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806825f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.185493f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.814507f}, +}; + +// For each output (2x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_2x5[10][48] = { +{0.314987f, 0.280141f, 0.203583f, 0.129696f, 0.071593f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.085378f, 0.141565f, 0.188187f, 0.272403f, 0.312467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255395f, 0.217105f, 0.170584f, 0.106646f, 0.072684f, 0.000000f, 0.000000f, 0.000000f, 0.072766f, 0.046537f, 0.029920f, 0.000000f, 0.028363f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069530f, 0.105913f, 0.164044f, 0.215260f, 0.255339f, 0.000000f, 0.000000f, 0.000000f, 0.025591f, 0.000000f, 0.036814f, 0.050349f, 0.077160f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152274f, 0.142699f, 0.102993f, 0.080565f, 0.018558f, 0.000000f, 0.000000f, 0.000000f, 0.157267f, 0.135460f, 0.099077f, 0.089287f, 0.021820f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026396f, 0.087011f, 0.099835f, 0.143472f, 0.149274f, 0.000000f, 0.000000f, 0.000000f, 0.019143f, 0.078700f, 0.099557f, 0.143621f, 0.152993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071546f, 0.054560f, 0.034641f, 0.000000f, 0.026492f, 0.000000f, 0.000000f, 0.000000f, 0.253751f, 0.217970f, 0.167740f, 0.101477f, 0.071823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031122f, 0.000000f, 0.038539f, 0.044578f, 0.068079f, 0.000000f, 0.000000f, 0.000000f, 0.074011f, 0.104132f, 0.176778f, 0.213248f, 0.249513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309516f, 0.271823f, 0.202932f, 0.138334f, 0.077394f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073235f, 0.136322f, 0.204986f, 0.270837f, 0.314620f}, +}; + +// For each output (3x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_3x5[15][48] = { +{0.506870f, 0.329427f, 0.163702f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.029175f, 0.167327f, 0.319880f, 0.321166f, 0.162451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158719f, 0.334975f, 0.506306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410647f, 0.270965f, 0.135943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101890f, 0.048392f, 0.032162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022675f, 0.131363f, 0.257700f, 0.263834f, 0.126043f, 0.021278f, 0.000000f, 0.000000f, 0.000000f, 0.022613f, 0.064121f, 0.066389f, 0.023985f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131149f, 0.266568f, 0.407438f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041342f, 0.046648f, 0.106854f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.259144f, 0.176197f, 0.070648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.256402f, 0.170550f, 0.067060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085864f, 0.160352f, 0.153663f, 0.093488f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093065f, 0.165400f, 0.162870f, 0.085298f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069632f, 0.177258f, 0.252242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066495f, 0.178932f, 0.255440f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109165f, 0.056989f, 0.043673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396795f, 0.263538f, 0.129840f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022525f, 0.061369f, 0.062101f, 0.020335f, 0.000000f, 0.000000f, 0.000000f, 0.022912f, 0.129308f, 0.258462f, 0.259250f, 0.129291f, 0.034446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042198f, 0.051815f, 0.111374f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.136459f, 0.257176f, 0.400979f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.509094f, 0.334982f, 0.155925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.175231f, 0.321060f, 0.327712f, 0.175997f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.154955f, 0.336566f, 0.508479f}, +}; + +// For each output (4x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_4x5[20][48] = { +{0.669318f, 0.330682f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.147967f, 0.437694f, 0.317636f, 0.064825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031879f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.060625f, 0.318845f, 0.433756f, 0.158597f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324316f, 0.675684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585012f, 0.264010f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.150977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134170f, 0.326735f, 0.247128f, 0.055953f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060565f, 0.080612f, 0.050606f, 0.022675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065736f, 0.255091f, 0.336456f, 0.141260f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020320f, 0.056879f, 0.083295f, 0.040963f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247404f, 0.561749f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037270f, 0.153576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.313615f, 0.178768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317328f, 0.167805f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022484f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.056200f, 0.226923f, 0.169203f, 0.032339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060880f, 0.227803f, 0.168145f, 0.036277f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022230f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020809f, 0.161103f, 0.242215f, 0.080276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037660f, 0.170123f, 0.226083f, 0.061733f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170517f, 0.314573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.183677f, 0.312560f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018674f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.150066f, 0.037627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.563093f, 0.249214f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047237f, 0.083719f, 0.064159f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.141594f, 0.343865f, 0.254176f, 0.047961f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060771f, 0.083714f, 0.056548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.055519f, 0.260450f, 0.341460f, 0.141538f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033365f, 0.158801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.243363f, 0.564471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027870f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.650693f, 0.321437f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.154390f, 0.455517f, 0.321763f, 0.068330f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030540f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067841f, 0.315774f, 0.431982f, 0.153863f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029780f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.315631f, 0.654589f}, +}; + +// For each output (5x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_5x5[25][48] = { +{0.728974f, 0.241827f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029199f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.326790f, 0.583809f, 0.061650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.474659f, 0.471971f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027161f, 0.026208f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064479f, 0.600103f, 0.335418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.245795f, 0.727343f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.577450f, 0.212083f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146821f, 0.063646f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.278532f, 0.501669f, 0.039082f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051617f, 0.129101f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401558f, 0.402789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.088129f, 0.087552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039177f, 0.470310f, 0.275467f, 0.000000f, 0.000000f, 0.000000f, 0.020182f, 0.000000f, 0.000000f, 0.131064f, 0.041994f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021806f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201719f, 0.586252f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071189f, 0.140839f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390859f, 0.113288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395284f, 0.100569f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.180479f, 0.291419f, 0.034269f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179460f, 0.288259f, 0.026114f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.232294f, 0.235881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.249972f, 0.265992f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.015860f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020495f, 0.297441f, 0.200057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.300629f, 0.181378f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094856f, 0.384959f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114338f, 0.382484f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023363f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.142672f, 0.067752f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.579242f, 0.210334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050987f, 0.132705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.278585f, 0.484125f, 0.053597f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026554f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092842f, 0.065201f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.385798f, 0.387342f, 0.000000f, 0.000000f, 0.021183f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021080f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044924f, 0.106062f, 0.061499f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047893f, 0.466019f, 0.252890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020637f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058939f, 0.143896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.202796f, 0.573732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.730809f, 0.235788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032140f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.330176f, 0.584667f, 0.053018f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026110f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.492274f, 0.481616f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065854f, 0.592001f, 0.342145f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.240768f, 0.722207f}, +}; + +// For each output (6x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_6x5[30][48] = { +{0.858351f, 0.111195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030454f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.561719f, 0.406108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.234049f, 0.720564f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.699282f, 0.247085f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053633f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.389024f, 0.574352f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092315f, 0.907685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.700837f, 0.094616f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181782f, 0.022766f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478824f, 0.322377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106995f, 0.067586f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.020740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019187f, 0.000000f, 0.211821f, 0.554939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.076920f, 0.116393f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.528826f, 0.215423f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.129030f, 0.084167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021007f, 0.021548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285851f, 0.511729f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.045516f, 0.156904f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061737f, 0.729570f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.185199f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023495f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.426048f, 0.065346f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.437353f, 0.050722f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020531f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.015946f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.269275f, 0.220699f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.271762f, 0.222318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107929f, 0.387609f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097175f, 0.384787f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022500f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393619f, 0.098786f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415799f, 0.073135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.219562f, 0.256847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.228262f, 0.295329f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.020203f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066094f, 0.437807f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023625f, 0.426898f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025372f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179453f, 0.029939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.702329f, 0.088278f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024531f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109211f, 0.062119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.483375f, 0.320765f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017885f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077080f, 0.134573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212908f, 0.535331f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022223f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.119888f, 0.115275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.556098f, 0.208739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022346f, 0.116179f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.324515f, 0.536960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039522f, 0.193447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040639f, 0.726391f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.857552f, 0.108625f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029799f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.542169f, 0.403976f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052699f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.223511f, 0.723790f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052693f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.702269f, 0.245038f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402547f, 0.597453f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031996f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086881f, 0.881123f}, +}; + +// For each output (7x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_7x5[35][48] = { +{0.964445f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.853417f, 0.094561f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.657134f, 0.277797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023601f, 0.000000f, 0.000000f, 0.020806f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.380325f, 0.419839f, 0.000000f, 0.023060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032462f, 0.000000f, 0.000000f, 0.025415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022865f, 0.000000f, 0.028258f, 0.000000f, 0.023082f, 0.020352f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024341f, 0.000000f, 0.000000f}, +{0.000000f, 0.031003f, 0.000000f, 0.000000f, 0.218422f, 0.657212f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024308f, 0.033400f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035654f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070868f, 0.871307f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.964400f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035600f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.771715f, 0.027473f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.681017f, 0.087709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170219f, 0.037187f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019162f, 0.000000f, 0.019267f, 0.000000f, 0.521425f, 0.210553f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107845f, 0.064833f, 0.000000f, 0.000000f, 0.000000f, 0.023456f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016876f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.374490f, 0.378533f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037317f, 0.000000f, 0.070870f, 0.081690f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019460f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020149f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017492f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198514f, 0.553647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069444f, 0.178395f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077267f, 0.707241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.191176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.777498f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025384f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.457893f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.477045f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024793f, 0.020109f, 0.000000f, 0.020160f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.453272f, 0.036882f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.449988f, 0.037704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022154f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390518f, 0.119870f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.380701f, 0.108911f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016500f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017868f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216278f, 0.228953f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.240939f, 0.263209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016253f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029917f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096934f, 0.340899f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.088970f, 0.426562f, 0.000000f, 0.000000f, 0.016718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021872f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073754f, 0.459232f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.422925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022217f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.019775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.473981f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020534f, 0.461485f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024225f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.772740f, 0.026789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025642f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165170f, 0.033854f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.660678f, 0.089428f, 0.000000f, 0.000000f, 0.000000f, 0.025229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016453f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.117847f, 0.083344f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.528281f, 0.230342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023732f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077971f, 0.049154f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382849f, 0.385195f, 0.000000f, 0.022790f, 0.000000f, 0.000000f, 0.020308f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017900f}, +{0.000000f, 0.000000f, 0.018444f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017477f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086693f, 0.093631f, 0.000000f, 0.032653f, 0.000000f, 0.000000f, 0.019144f, 0.000000f, 0.199637f, 0.532319f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020247f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035464f, 0.208022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065940f, 0.670327f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209616f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.790384f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046570f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.849248f, 0.104183f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049999f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.649521f, 0.279647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025099f, 0.000000f, 0.000000f, 0.017993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028953f, 0.000000f, 0.027848f, 0.031988f, 0.000000f, 0.000000f, 0.000000f, 0.022049f, 0.000000f, 0.000000f, 0.397216f, 0.418570f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.243424f, 0.690894f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071869f, 0.877426f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036401f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963599f}, +}; + +// For each output (8x5) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_8x5[40][48] = { +{0.966296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.966306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.966296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.966298f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033702f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966291f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966291f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.793476f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.206524f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196151f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196376f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.797993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.202007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.776552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027465f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.793721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.206279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806466f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.797656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.202344f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.476380f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490205f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.485068f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.498077f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.476651f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.474340f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480228f, 0.045432f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478505f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.521495f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.483579f, 0.000000f, 0.000000f, 0.037742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.521456f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478544f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507379f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.492621f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196765f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803235f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800350f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203568f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.796432f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179104f, 0.025788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801458f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212749f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787251f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.789721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966296f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966291f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033700f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966300f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966295f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033692f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966308f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033731f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966269f}, +}; + +// For each output (2x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_2x6[12][48] = { +{0.316864f, 0.281020f, 0.203578f, 0.128737f, 0.069800f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.084099f, 0.140260f, 0.188810f, 0.272909f, 0.313922f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309774f, 0.274434f, 0.201401f, 0.144203f, 0.070188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065514f, 0.142636f, 0.201399f, 0.276345f, 0.314107f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317592f, 0.277500f, 0.192959f, 0.141457f, 0.070491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073241f, 0.142588f, 0.198561f, 0.278233f, 0.307377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320020f, 0.275328f, 0.193983f, 0.143663f, 0.067007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069519f, 0.132193f, 0.205168f, 0.279209f, 0.313912f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.314759f, 0.279613f, 0.202284f, 0.130432f, 0.072912f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.077965f, 0.136688f, 0.207007f, 0.271208f, 0.307132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.311744f, 0.272206f, 0.202758f, 0.136022f, 0.077269f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072611f, 0.134437f, 0.204577f, 0.271631f, 0.316744f}, +}; + +// For each output (3x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_3x6[18][48] = { +{0.509323f, 0.329513f, 0.161164f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.025207f, 0.165943f, 0.323432f, 0.324818f, 0.160600f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157414f, 0.335022f, 0.507564f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.511584f, 0.329744f, 0.158672f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031983f, 0.159222f, 0.310218f, 0.312506f, 0.158287f, 0.027785f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.156210f, 0.333357f, 0.510434f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.515123f, 0.331176f, 0.153701f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026619f, 0.155693f, 0.312956f, 0.312469f, 0.159059f, 0.033204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.156669f, 0.330733f, 0.512598f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503816f, 0.332794f, 0.163390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024597f, 0.154193f, 0.318347f, 0.305757f, 0.159499f, 0.037605f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158978f, 0.332267f, 0.508755f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.512301f, 0.329905f, 0.157794f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034639f, 0.152702f, 0.307204f, 0.309309f, 0.167621f, 0.028524f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152238f, 0.331031f, 0.516731f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.511179f, 0.335760f, 0.153061f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173463f, 0.322489f, 0.329811f, 0.174238f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152159f, 0.337011f, 0.510830f}, +}; + +// For each output (4x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_4x6[24][48] = { +{0.671100f, 0.328900f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.148979f, 0.456693f, 0.330185f, 0.064143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.058158f, 0.330805f, 0.451065f, 0.159972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322150f, 0.677850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.677593f, 0.322407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167723f, 0.446276f, 0.319975f, 0.066025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073990f, 0.323047f, 0.441943f, 0.161020f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326071f, 0.673929f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.679042f, 0.320958f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152853f, 0.450375f, 0.323919f, 0.072853f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061203f, 0.320863f, 0.451270f, 0.166664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319746f, 0.680254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.676510f, 0.323490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.162624f, 0.457726f, 0.332137f, 0.047514f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063329f, 0.328068f, 0.444798f, 0.163805f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320574f, 0.679426f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.678066f, 0.321934f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166497f, 0.448536f, 0.320669f, 0.064298f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065578f, 0.323791f, 0.452649f, 0.157982f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322175f, 0.677825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.671500f, 0.328500f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.150795f, 0.460955f, 0.323971f, 0.064280f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066061f, 0.327767f, 0.449877f, 0.156295f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322687f, 0.677313f}, +}; + +// For each output (5x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_5x6[30][48] = { +{0.754364f, 0.245636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.335285f, 0.602164f, 0.062551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.500479f, 0.499521f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057582f, 0.607199f, 0.335218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.249634f, 0.750366f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.757244f, 0.242756f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.346204f, 0.598435f, 0.055362f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.501490f, 0.498510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060219f, 0.591314f, 0.348467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.244713f, 0.755287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.752634f, 0.247366f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.342331f, 0.595920f, 0.061748f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496285f, 0.503715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.055875f, 0.601113f, 0.343013f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.245684f, 0.754316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.754642f, 0.245358f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.341881f, 0.605457f, 0.052662f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.506471f, 0.493529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052276f, 0.594038f, 0.353686f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.243659f, 0.756341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.752998f, 0.247002f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.343161f, 0.587149f, 0.069691f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.497737f, 0.502263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068745f, 0.600800f, 0.330455f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.249755f, 0.750245f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.760155f, 0.239845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.341132f, 0.607027f, 0.051841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.505602f, 0.494398f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063784f, 0.594541f, 0.341675f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246784f, 0.753216f}, +}; + +// For each output (6x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_6x6[36][48] = { +{0.891095f, 0.108905f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.581832f, 0.418168f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.242153f, 0.757847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.741976f, 0.258024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.403606f, 0.596394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087517f, 0.912483f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.889771f, 0.110229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.562123f, 0.416930f, 0.000000f, 0.020947f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239798f, 0.760202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.745430f, 0.254570f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386117f, 0.613883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079820f, 0.920180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.881826f, 0.118174f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.573611f, 0.426389f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.253276f, 0.746724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.743647f, 0.256353f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401870f, 0.598130f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084584f, 0.915416f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.886496f, 0.113504f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.579329f, 0.420671f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247079f, 0.752921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.738480f, 0.261520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387849f, 0.612151f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084296f, 0.915704f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.887045f, 0.112955f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.566292f, 0.413182f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020526f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.245603f, 0.754397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.743664f, 0.256336f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400389f, 0.599611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085951f, 0.914049f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.893377f, 0.106623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.559870f, 0.416555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.230693f, 0.769307f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.743815f, 0.256185f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401590f, 0.598410f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084902f, 0.915098f}, +}; + +// For each output (7x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_7x6[42][48] = { +{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.898749f, 0.101251f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.666832f, 0.285944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022807f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.408751f, 0.452880f, 0.000000f, 0.022279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020101f, 0.000000f, 0.026406f, 0.000000f, 0.021392f, 0.021638f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026554f, 0.000000f, 0.000000f}, +{0.000000f, 0.030824f, 0.000000f, 0.000000f, 0.224222f, 0.683355f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025094f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036505f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074156f, 0.925844f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.898226f, 0.101774f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026159f, 0.000000f, 0.029283f, 0.000000f, 0.659538f, 0.261285f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023736f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.465472f, 0.460934f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.047490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026105f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.265328f, 0.711299f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023373f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070803f, 0.929197f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.898251f, 0.101749f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.687826f, 0.256342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024743f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032537f, 0.022458f, 0.000000f, 0.000000f, 0.021138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410489f, 0.430095f, 0.000000f, 0.017967f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018781f, 0.000000f, 0.000000f, 0.026567f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019968f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246938f, 0.753062f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.075058f, 0.924942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.901297f, 0.098703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.030068f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.643429f, 0.251859f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020470f, 0.024955f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018980f, 0.000000f, 0.000000f, 0.026284f, 0.019861f, 0.000000f, 0.028010f, 0.000000f, 0.000000f, 0.000000f, 0.445381f, 0.461483f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.238827f, 0.737044f, 0.000000f, 0.024129f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.068567f, 0.931433f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.900397f, 0.099603f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.026865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.019850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.657029f, 0.271313f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026549f, 0.000000f, 0.440563f, 0.453523f, 0.000000f, 0.025057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030966f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255214f, 0.713821f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.075429f, 0.924571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.889492f, 0.110508f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021347f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.683322f, 0.295331f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027842f, 0.000000f, 0.000000f, 0.031873f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028367f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.023945f, 0.000000f, 0.000000f, 0.417773f, 0.443199f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026004f, 0.000000f, 0.000000f, 0.024319f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.243621f, 0.706056f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.075547f, 0.924453f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f}, +}; + +// For each output (8x6) sample, the weight of each input (8x6) sample. +static const float g_weight_downsample_8x6_to_8x6[48][48] = { +{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f}, +{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f}, +}; + +downsample_matrix g_downsample_matrices_8x6[] = +{ + { 2, 2, (const float*)g_weight_downsample_8x6_to_2x2 }, + { 3, 2, (const float*)g_weight_downsample_8x6_to_3x2 }, + { 4, 2, (const float*)g_weight_downsample_8x6_to_4x2 }, + { 5, 2, (const float*)g_weight_downsample_8x6_to_5x2 }, + { 6, 2, (const float*)g_weight_downsample_8x6_to_6x2 }, + { 7, 2, (const float*)g_weight_downsample_8x6_to_7x2 }, + { 8, 2, (const float*)g_weight_downsample_8x6_to_8x2 }, + { 2, 3, (const float*)g_weight_downsample_8x6_to_2x3 }, + { 3, 3, (const float*)g_weight_downsample_8x6_to_3x3 }, + { 4, 3, (const float*)g_weight_downsample_8x6_to_4x3 }, + { 5, 3, (const float*)g_weight_downsample_8x6_to_5x3 }, + { 6, 3, (const float*)g_weight_downsample_8x6_to_6x3 }, + { 7, 3, (const float*)g_weight_downsample_8x6_to_7x3 }, + { 8, 3, (const float*)g_weight_downsample_8x6_to_8x3 }, + { 2, 4, (const float*)g_weight_downsample_8x6_to_2x4 }, + { 3, 4, (const float*)g_weight_downsample_8x6_to_3x4 }, + { 4, 4, (const float*)g_weight_downsample_8x6_to_4x4 }, + { 5, 4, (const float*)g_weight_downsample_8x6_to_5x4 }, + { 6, 4, (const float*)g_weight_downsample_8x6_to_6x4 }, + { 7, 4, (const float*)g_weight_downsample_8x6_to_7x4 }, + { 8, 4, (const float*)g_weight_downsample_8x6_to_8x4 }, + { 2, 5, (const float*)g_weight_downsample_8x6_to_2x5 }, + { 3, 5, (const float*)g_weight_downsample_8x6_to_3x5 }, + { 4, 5, (const float*)g_weight_downsample_8x6_to_4x5 }, + { 5, 5, (const float*)g_weight_downsample_8x6_to_5x5 }, + { 6, 5, (const float*)g_weight_downsample_8x6_to_6x5 }, + { 7, 5, (const float*)g_weight_downsample_8x6_to_7x5 }, + { 8, 5, (const float*)g_weight_downsample_8x6_to_8x5 }, + { 2, 6, (const float*)g_weight_downsample_8x6_to_2x6 }, + { 3, 6, (const float*)g_weight_downsample_8x6_to_3x6 }, + { 4, 6, (const float*)g_weight_downsample_8x6_to_4x6 }, + { 5, 6, (const float*)g_weight_downsample_8x6_to_5x6 }, + { 6, 6, (const float*)g_weight_downsample_8x6_to_6x6 }, + { 7, 6, (const float*)g_weight_downsample_8x6_to_7x6 }, + { 8, 6, (const float*)g_weight_downsample_8x6_to_8x6 } +}; + //-------------------------------------------------------------------------------------------------------------------------- const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height) { - // TODO: Use hash or map lookup. + // TODO: Use hash or map lookup, or calc the index directly for (const auto& m : g_downsample_matrices_6x6) if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height)) return m.m_p; @@ -4770,6 +5668,121 @@ const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height return nullptr; } +const float* get_8x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height) +{ + // TODO: Use hash or map lookup, or calc the index directly + for (const auto& m : g_downsample_matrices_8x6) + if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height)) + return m.m_p; + + assert(0); + return nullptr; +} + +//-------------------------------------------------------------------------------------------------------------------------- + +void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) +{ + assert((block_width >= 2) && (block_width <= astc_helpers::MAX_BLOCK_DIM)); + assert((block_height >= 2) && (block_height <= astc_helpers::MAX_BLOCK_DIM)); + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + + const uint32_t num_block_samples = block_width * block_height; + const uint32_t num_grid_samples = grid_width * grid_height; + + astc_helpers::weighted_sample samples[astc_helpers::MAX_BLOCK_DIM * astc_helpers::MAX_BLOCK_DIM]; + clear_obj(samples); + + astc_helpers::compute_upsample_weights(block_width, block_height, grid_width, grid_height, samples); + + // Compute upsample matrix: output num_block_samples (rows), input num_grid_samples (cols) + upsample_matrix.resize_rows_cols(num_block_samples, num_grid_samples); + + basisu::vector weights(num_grid_samples); + + // compute which source sample(s) contribute to it. + for (uint32_t d = 0; d < num_block_samples; d++) + { + const astc_helpers::weighted_sample& ws = samples[d]; + + weights.set_all(0.0f); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + float w = ws.m_weights[y][x] * (1.0f / 16.0f); + if (!w) + continue; + + assert((ws.m_src_x + x) < grid_width); + assert((ws.m_src_y + y) < grid_height); + + assert(weights[(ws.m_src_x + x) + (ws.m_src_y + y) * grid_width] == 0.0f); + weights[(ws.m_src_x + x) + (ws.m_src_y + y) * grid_width] = w; + } // x + } // y + + for (uint32_t i = 0; i < num_grid_samples; i++) + upsample_matrix.at_row_col(d, i) = weights[i]; + + } // d +} + +//-------------------------------------------------------------------------------------------------------------------------- +// compute At - used for gradient descent + +void compute_upsample_matrix_transposed(basisu::vector& unweighted_downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) +{ + assert((block_width >= 2) && (block_width <= astc_helpers::MAX_BLOCK_DIM)); + assert((block_height >= 2) && (block_height <= astc_helpers::MAX_BLOCK_DIM)); + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + + const uint32_t num_block_samples = block_width * block_height; + const uint32_t num_grid_samples = grid_width * grid_height; + + // Compute upsample matrix: output num_block_samples (rows), input num_grid_samples (cols) + vector2D upsample_matrix; + compute_upsample_matrix(upsample_matrix, block_width, block_height, grid_width, grid_height); + + // downsample matrix At (without any scaling): num_grid_samples (rows), num_block_samples (cols) + unweighted_downsample_matrix.resize(num_grid_samples * num_block_samples); + unweighted_downsample_matrix.set_all(0.0f); + + for (uint32_t j = 0; j < num_grid_samples; ++j) + for (uint32_t i = 0; i < num_block_samples; ++i) + unweighted_downsample_matrix[j * num_block_samples + i] = upsample_matrix.at_row_col(i, j); +} + +//-------------------------------------------------------------------------------------------------------------------------- +// Computes downsample matrices - simpler alternative to SLSQP + +//-------------------------------------------------------------------------------------------------------------------------- +// pDst_vec[] - size must be >= num_grid_samples +// vector used for gradient descent + +void compute_diag_AtA_vector(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height, const vector2D &upsample_matrix, float* pDst_vec) +{ + const uint32_t num_block_samples = block_width * block_height; + const uint32_t num_grid_samples = grid_width * grid_height; + + memset(pDst_vec, 0, sizeof(float) * num_grid_samples); + + for (uint32_t r = 0; r < num_block_samples; ++r) + { + for (uint32_t c = 0; c < num_grid_samples; ++c) + { + const float arc = upsample_matrix.at_row_col(r, c); + + pDst_vec[c] += arc * arc; + } + } +} + +//-------------------------------------------------------------------------------------------------------------------------- + void downsample_weight_grid( const float* pMatrix_weights, uint32_t bx, uint32_t by, // source/from dimension (block size) @@ -4807,7 +5820,7 @@ void downsample_ise_weights( assert((block_w <= MAX_ASTC_HDR_BLOCK_W) && (block_h <= MAX_ASTC_HDR_BLOCK_H)); assert((grid_w >= 2) && (grid_w <= MAX_ASTC_HDR_BLOCK_W)); assert((grid_h >= 2) && (grid_h <= MAX_ASTC_HDR_BLOCK_H)); - + assert(dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE); assert(dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE); @@ -4906,7 +5919,7 @@ static bool refine_endpoints_mode11( { for (uint32_t i = 0; i < num_block_pixels; i++) def_pixel_block_ofs[i] = (uint8_t)i; - + pPixel_block_ofs = def_pixel_block_ofs; } @@ -4932,7 +5945,7 @@ static bool refine_endpoints_mode11( trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]]; trial_blk_raw_weightsf[i] = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f); } - + vec3F l_q16, h_q16; if (opt_mode == cOrdinaryLeastSquares) { @@ -4963,7 +5976,7 @@ static bool refine_endpoints_mode11( { float mid = (0.0f - l) / (h - l); mid = clamp(mid, .01f, .99f); - + float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT; if (opt_mode == cWeightedLeastSquaresHeavy) lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY; @@ -5023,7 +6036,7 @@ static bool refine_endpoints_mode11( } uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS]; - + uint32_t submode_used; bool pack_succeeded = pack_mode11(l_q16, h_q16, endpoint_ise_range, trial_endpoints, coptions, direct_only, first_submode, last_submode, false, submode_used); @@ -5050,7 +6063,7 @@ static bool refine_endpoints_mode11( const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale; double cur_error = 0, trial_error = 0; - + for (uint32_t p = 0; p < num_pixels; p++) { const half_float* pDesired_half = &pBlock_pixels_half[p][0]; @@ -5177,7 +6190,7 @@ static bool refine_endpoints_mode7( vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16)); vec3F new_high_color_q16(block_mean_color_q16); - + const float one_over_num_pixels = 1.0f / (float)num_pixels; for (uint32_t i = 0; i < num_pixels; i++) @@ -5189,7 +6202,7 @@ static bool refine_endpoints_mode7( new_high_color_q16[1] += k; new_high_color_q16[2] += k; } - + // Given a set of selectors and a high color, try to compute a better S. float t = 0.0f; @@ -5201,7 +6214,7 @@ static bool refine_endpoints_mode7( } t *= one_over_num_pixels; - + if (fabs(t) < .0000125f) return false; @@ -5227,7 +6240,7 @@ static bool refine_endpoints_mode7( if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range)) return false; - + // -- for (uint32_t i = 0; i < 3; i++) @@ -5358,3 +6371,4 @@ bool refine_endpoints( } } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_astc_hdr_common.h b/external/basis_universal/encoder/basisu_astc_hdr_common.h index 18f424fe5a..e01999248a 100644 --- a/external/basis_universal/encoder/basisu_astc_hdr_common.h +++ b/external/basis_universal/encoder/basisu_astc_hdr_common.h @@ -12,13 +12,13 @@ namespace basisu const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec) const uint32_t MODE7_TOTAL_SUBMODES = 6; - + // [ise_range][0] = # levels // [ise_range][1...] = lerp value [0,64] // in ASTC order // Supported ISE weight ranges: 0 to 11, 12 total const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels - const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels + const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=32 levels const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2; const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32; @@ -29,13 +29,17 @@ namespace basisu const float LDR_TO_HDR_NITS = 100.0f; + extern vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; + extern uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index + extern uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index + struct astc_hdr_codec_base_options { float m_r_err_scale, m_g_err_scale; float m_q_log_bias; - + bool m_ultra_quant; - + // If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however. bool m_allow_uber_mode; @@ -45,7 +49,7 @@ namespace basisu bool m_take_first_non_clamping_mode7_submode; bool m_disable_weight_plane_optimization; - + astc_hdr_codec_base_options() { init(); } void init(); @@ -173,7 +177,7 @@ namespace basisu basist::half_float* pDecoded_half, vec3F* pDecoded_float, uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range); - + // Fast high precision piecewise linear approximation of log2(bias+x). // Half may be zero, positive or denormal. No NaN/Inf/negative. BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias) @@ -183,7 +187,7 @@ namespace basisu fi.f = fast_half_to_float_pos_not_inf_or_nan(x); assert(fi.f >= 0.0f); - + fi.f += log_bias; return (double)fi.u; // approx log2f(fi.f), need to return double for the precision @@ -196,7 +200,7 @@ namespace basisu fi.f = fast_half_to_float_pos_not_inf_or_nan(x); assert(fi.f >= 0.0f); - + fi.f += log_bias; return fi.u; @@ -298,8 +302,8 @@ namespace basisu uint32_t ise_endpoint_range, bool uber_mode, bool constrain_ise_weight_selectors, - int32_t first_submode, int32_t last_submode, bool ignore_clamping, - opt_mode_t opt_mode, + int32_t first_submode, int32_t last_submode, bool ignore_clamping, + opt_mode_t opt_mode, const encode_astc_block_stats *pBlock_stats = nullptr); double encode_astc_hdr_block_downsampled_mode_11( @@ -325,7 +329,7 @@ namespace basisu uint32_t ise_endpoint_range, bool uber_mode, bool constrain_ise_weight_selectors, - int32_t first_submode, int32_t last_submode, + int32_t first_submode, int32_t last_submode, bool ignore_clamping); double encode_astc_hdr_block_mode_7( @@ -337,8 +341,8 @@ namespace basisu uint8_t* blk_endpoints, //[4] uint8_t* blk_weights, // [num_pixels] const astc_hdr_codec_base_options& coptions, - uint32_t ise_endpoint_range, - int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, + uint32_t ise_endpoint_range, + int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, const encode_astc_block_stats *pBlock_stats = nullptr); //-------------------------------------------------------------------------------------------------------------------------- @@ -373,17 +377,23 @@ namespace basisu bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0); void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16); - + bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints); void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc); void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index); - + void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights); const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height); - + const float* get_8x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height); + + void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + void compute_upsample_matrix_transposed(basisu::vector& unweighted_downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + void compute_diag_AtA_vector(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height, const vector2D& upsample_matrix, float* pDst_vec); + void downsample_weight_grid( const float* pMatrix_weights, uint32_t bx, uint32_t by, // source/from dimension (block size) @@ -413,10 +423,11 @@ namespace basisu uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[], const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode); - + extern bool g_astc_hdr_enc_initialized; // This MUST be called before encoding any blocks. void astc_hdr_enc_init(); } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_astc_ldr_common.cpp b/external/basis_universal/encoder/basisu_astc_ldr_common.cpp new file mode 100644 index 0000000000..9d208db822 --- /dev/null +++ b/external/basis_universal/encoder/basisu_astc_ldr_common.cpp @@ -0,0 +1,5667 @@ +// File: basisu_astc_ldr_common.cpp +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" +#include "../transcoder/basisu_astc_helpers.h" +#include "../transcoder/basisu_astc_hdr_core.h" +#include "basisu_astc_hdr_common.h" +#include "basisu_astc_ldr_common.h" + +#define BASISU_ASTC_LDR_DEBUG_MSGS (1) + +namespace basisu +{ + +namespace astc_ldr +{ + static bool g_initialized; + static vec4F g_astc_ls_raw_weights_ise[ASTC_LDR_MAX_RAW_WEIGHTS]; + + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b) + { + color_rgba enc; + + int tr = orig.r * 2 - encoded_b; + int tg = orig.g * 2 - encoded_b; + if ((tr < 0) || (tr > 255) || (tg < 0) || (tg > 255)) + did_clamp = true; + + enc.r = (uint8_t)basisu::clamp(tr, 0, 255); + enc.g = (uint8_t)basisu::clamp(tg, 0, 255); + enc.b = (uint8_t)orig.b; + enc.a = orig.a; + return enc; + } + + color_rgba blue_contract_dec(int enc_r, int enc_g, int enc_b, int enc_a) + { + color_rgba dec; + dec.r = (uint8_t)((enc_r + enc_b) >> 1); + dec.g = (uint8_t)((enc_g + enc_b) >> 1); + dec.b = (uint8_t)enc_b; + dec.a = (uint8_t)enc_a; + return dec; + } + + void global_init() + { + if (g_initialized) + return; + + // Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w + for (uint32_t iw = 0; iw <= 64; iw++) + { + float w = (float)iw * (1.0f / 64.0f); + + g_astc_ls_raw_weights_ise[iw].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + g_initialized = true; + } + + static inline const vec4F* get_ls_weights_ise(uint32_t weight_ise_range) + { + assert((weight_ise_range <= astc_helpers::BISE_32_LEVELS) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + // astc_helpers::BISE_64_LEVELS indicates raw [0,64] weights (65 total), otherwise ISE weights (<= 32 levels total) + return (weight_ise_range == astc_helpers::BISE_64_LEVELS) ? g_astc_ls_raw_weights_ise : &g_astc_ls_weights_ise[weight_ise_range][0]; + } + + static bool compute_least_squares_endpoints_1D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + float* pXl, float* pXh, const float* pVals, float bounds_min, float bounds_max) + { + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + + q00_r += w * pVals[i]; + t_r += pVals[i]; + } + + q10_r = t_r - q00_r; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + *pXh = (float)(iz00 * q00_r + iz01 * q10_r); *pXl = (float)(iz10 * q00_r + iz11 * q10_r); + + float l = saturate(*pXl), h = saturate(*pXh); + + if (bounds_min == bounds_max) + { + l = bounds_min; + h = bounds_max; + } + + *pXl = l; + *pXh = h; + + return true; + } + + static bool compute_least_squares_endpoints_2D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec2F* pXl, vec2F* pXh, const vec2F* pColors, const vec2F& bounds_min, const vec2F& bounds_max) + { + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXh)[0] = (float)(iz00 * q00_r + iz01 * q10_r); (*pXl)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + (*pXh)[1] = (float)(iz00 * q00_g + iz01 * q10_g); (*pXl)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + + for (uint32_t c = 0; c < 2; c++) + { + float l = saturate((*pXl)[c]), h = saturate((*pXh)[c]); + + if (bounds_min[c] == bounds_max[c]) + { + l = bounds_min[c]; + h = bounds_max[c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + return true; + } + + static bool compute_least_squares_endpoints_3D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec4F* pXl, vec4F* pXh, const vec4F* pColors, const vec4F& bounds_min, const vec4F& bounds_max) + { + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + + q00_r += w * pColors[i][0]; + t_r += pColors[i][0]; + + q00_g += w * pColors[i][1]; + t_g += pColors[i][1]; + + q00_b += w * pColors[i][2]; + t_b += pColors[i][2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXh)[0] = (float)(iz00 * q00_r + iz01 * q10_r); (*pXl)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + (*pXh)[1] = (float)(iz00 * q00_g + iz01 * q10_g); (*pXl)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + (*pXh)[2] = (float)(iz00 * q00_b + iz01 * q10_b); (*pXl)[2] = (float)(iz10 * q00_b + iz11 * q10_b); + + (*pXh)[3] = 0; + (*pXl)[3] = 0; + + for (uint32_t c = 0; c < 3; c++) + { + float l = saturate((*pXl)[c]), h = saturate((*pXh)[c]); + + if (bounds_min[c] == bounds_max[c]) + { + l = bounds_min[c]; + h = bounds_max[c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + return true; + } + + static bool compute_least_squares_endpoints_4D( + uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, + vec4F* pXl, vec4F* pXh, const vec4F* pColors, const vec4F& bounds_min, const vec4F& bounds_max) + { + float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + float q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + float w = pSelector_weights[sel][3]; + q00_r += w * pColors[i][0]; t_r += pColors[i][0]; + q00_g += w * pColors[i][1]; t_g += pColors[i][1]; + q00_b += w * pColors[i][2]; t_b += pColors[i][2]; + q00_a += w * pColors[i][3]; t_a += pColors[i][3]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + q10_a = t_a - q00_a; + + z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + (*pXh)[0] = (float)(iz00 * q00_r + iz01 * q10_r); (*pXl)[0] = (float)(iz10 * q00_r + iz11 * q10_r); + (*pXh)[1] = (float)(iz00 * q00_g + iz01 * q10_g); (*pXl)[1] = (float)(iz10 * q00_g + iz11 * q10_g); + (*pXh)[2] = (float)(iz00 * q00_b + iz01 * q10_b); (*pXl)[2] = (float)(iz10 * q00_b + iz11 * q10_b); + (*pXh)[3] = (float)(iz00 * q00_a + iz01 * q10_a); (*pXl)[3] = (float)(iz10 * q00_a + iz11 * q10_a); + + for (uint32_t c = 0; c < 4; c++) + { + float l = saturate((*pXl)[c]), h = saturate((*pXh)[c]); + + if (bounds_min[c] == bounds_max[c]) + { + l = bounds_min[c]; + h = bounds_max[c]; + } + + (*pXl)[c] = l; + (*pXh)[c] = h; + } + + return true; + } + +#if 0 + static void dequant_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights) + { + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val; + + for (uint32_t i = 0; i < n; i++) + pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]]; + } +#endif + +#if 0 + static void dequant_astc_endpoints(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights) + { + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(from_ise_range).m_ISE_to_val; + + for (uint32_t i = 0; i < n; i++) + pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]]; + } +#endif + + int apply_delta_to_bise_weight_val(uint32_t weight_ise_range, int ise_val, int delta) + { + if (delta == 0) + return ise_val; + + uint32_t num_ise_levels = astc_helpers::get_ise_levels(weight_ise_range); + + const auto& ISE_to_rank = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_rank; + const auto& rank_to_ISE = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_rank_to_ISE; + + int cur_rank = ISE_to_rank[ise_val]; + int new_rank = basisu::clamp(cur_rank + delta, 0, (int)num_ise_levels - 1); + + return rank_to_ISE[new_rank]; + } + + // v must be [0,1] + // converts to nearest ISE index with proper precise rounding + static uint8_t precise_round_bise_endpoint_val(float v, uint32_t endpoint_ise_range) + { + assert((v >= 0) && (v <= 1.0f)); + + const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_val_to_ise; + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_val; + + v = saturate(v); + + const int iv = clamp((int)std::roundf(v * 255.0f), 0, 255); + + uint8_t ise_index = 0; + + float best_err = BIG_FLOAT_VAL; + for (int iscale_delta = -1; iscale_delta <= 1; iscale_delta++) + { + const int trial_ise_index = astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, quant_tab[iv], iscale_delta); + + const float dequant_val = dequant_tab[trial_ise_index] * (1.0f / 255.0f); + + const float dequant_err = fabs(dequant_val - v); + if (dequant_err < best_err) + { + best_err = dequant_err; + ise_index = (uint8_t)trial_ise_index; + } + } // iscale_delta + + return ise_index; + } + + // returns true if blue contraction was actually used + // note the encoded endpoints may be swapped + // TODO: Pass in vec4F l/h and let it more precisely quantize in here. + struct cem_encode_ldr_rgb_or_rgba_direct_result + { + bool m_is_blue_contracted; + bool m_endpoints_are_swapped; + bool m_any_degen; + }; + + static cem_encode_ldr_rgb_or_rgba_direct_result cem_encode_ldr_rgb_or_rgba_direct( + uint32_t cem_index, uint32_t endpoint_ise_range, const color_rgba& l, const color_rgba& h, uint8_t* pEndpoint_vals, + bool try_blue_contract) + { + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + + cem_encode_ldr_rgb_or_rgba_direct_result res; + + bool& endpoints_are_swapped = res.m_endpoints_are_swapped; + bool& any_degen = res.m_any_degen; + bool& is_blue_contracted = res.m_is_blue_contracted; + + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + + const bool has_alpha = (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT); + + const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_val_to_ise; + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_val; + + //const auto &ISE_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_rank; + //const auto &rank_to_ISE = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_rank_to_ISE; + + color_rgba enc_l(l), enc_h(h); + endpoints_are_swapped = false; + + is_blue_contracted = false; + if (try_blue_contract) + { + int enc_v4 = quant_tab[enc_l.b], enc_v5 = quant_tab[enc_h.b]; + int dec_v4 = dequant_tab[enc_v4], dec_v5 = dequant_tab[enc_v5]; + + bool did_clamp = false; + enc_l = blue_contract_enc(h, did_clamp, dec_v5); // yes, they're swapped in the spec + enc_h = blue_contract_enc(l, did_clamp, dec_v4); + + if (!did_clamp) + { + is_blue_contracted = true; + endpoints_are_swapped = true; + } + else + { + enc_l = l; + enc_h = h; + } + } + + int enc_v0 = quant_tab[enc_l.r], enc_v2 = quant_tab[enc_l.g], enc_v4 = quant_tab[enc_l.b]; + int enc_v1 = quant_tab[enc_h.r], enc_v3 = quant_tab[enc_h.g], enc_v5 = quant_tab[enc_h.b]; + + int enc_v6 = 0, enc_v7 = 0; + if (has_alpha) + { + enc_v6 = quant_tab[enc_l.a]; + enc_v7 = quant_tab[enc_h.a]; + } + + any_degen = false; + if ((enc_v0 == enc_v1) && (l.r != h.r)) + any_degen = true; + if ((enc_v2 == enc_v3) && (l.g != h.g)) + any_degen = true; + if ((enc_v4 == enc_v5) && (l.b != h.b)) + any_degen = true; + if (has_alpha) + { + if ((enc_v6 == enc_v7) && (l.a != h.a)) + any_degen = true; + } + + int dec_v0 = dequant_tab[enc_v0], dec_v2 = dequant_tab[enc_v2], dec_v4 = dequant_tab[enc_v4]; + int dec_v1 = dequant_tab[enc_v1], dec_v3 = dequant_tab[enc_v3], dec_v5 = dequant_tab[enc_v5]; + + int s0 = dec_v0 + dec_v2 + dec_v4; + int s1 = dec_v1 + dec_v3 + dec_v5; + + bool should_swap = false; + + if ((s1 == s0) && (is_blue_contracted)) + { + // if sums are equal we can't use blue contraction at all, so undo it + enc_l = l; + enc_h = h; + + is_blue_contracted = false; + endpoints_are_swapped = false; + + enc_v0 = quant_tab[enc_l.r], enc_v2 = quant_tab[enc_l.g], enc_v4 = quant_tab[enc_l.b]; + enc_v1 = quant_tab[enc_h.r], enc_v3 = quant_tab[enc_h.g], enc_v5 = quant_tab[enc_h.b]; + + dec_v0 = dequant_tab[enc_v0], dec_v2 = dequant_tab[enc_v2], dec_v4 = dequant_tab[enc_v4]; + dec_v1 = dequant_tab[enc_v1], dec_v3 = dequant_tab[enc_v3], dec_v5 = dequant_tab[enc_v5]; + + if (has_alpha) + { + enc_v6 = quant_tab[enc_l.a]; + enc_v7 = quant_tab[enc_h.a]; + } + + s0 = dec_v0 + dec_v2 + dec_v4; + s1 = dec_v1 + dec_v3 + dec_v5; + } + + if (s1 >= s0) + { + if (is_blue_contracted) + should_swap = true; + } + else + { + if (!is_blue_contracted) + should_swap = true; + } + + if (should_swap) + { + endpoints_are_swapped = !endpoints_are_swapped; + + std::swap(enc_v0, enc_v1); + std::swap(enc_v2, enc_v3); + std::swap(enc_v4, enc_v5); + std::swap(enc_v6, enc_v7); + } + + pEndpoint_vals[0] = (uint8_t)enc_v0; + pEndpoint_vals[1] = (uint8_t)enc_v1; + + pEndpoint_vals[2] = (uint8_t)enc_v2; + pEndpoint_vals[3] = (uint8_t)enc_v3; + + pEndpoint_vals[4] = (uint8_t)enc_v4; + pEndpoint_vals[5] = (uint8_t)enc_v5; + + if (has_alpha) + { + pEndpoint_vals[6] = (uint8_t)enc_v6; + pEndpoint_vals[7] = (uint8_t)enc_v7; + } + + #ifdef _DEBUG + { + int check_s0 = dequant_tab[enc_v0] + dequant_tab[enc_v2] + dequant_tab[enc_v4]; + int check_s1 = dequant_tab[enc_v1] + dequant_tab[enc_v3] + dequant_tab[enc_v5]; + + if (check_s1 >= check_s0) + { + assert(!is_blue_contracted); + } + else + { + assert(is_blue_contracted); + } + } + #endif + + return res; + } + + // Cannot fail + // scale=1 cannot be packed + static void cem_encode_ldr_rgb_or_rgba_base_scale( + uint32_t cem_index, uint32_t endpoint_ise_range, float scale, float l_a, const vec4F& h, uint8_t* pEndpoint_vals) + { + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)); + assert((scale >= 0.0f) && (scale < 1.0f)); + + const bool has_alpha = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + + const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_val_to_ise; + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_val; + + const uint32_t total_vals_to_pack = has_alpha ? 6 : 4; + + float vals_to_pack[6] = { 0 }; + + vals_to_pack[0] = h[0]; + vals_to_pack[1] = h[1]; + vals_to_pack[2] = h[2]; + vals_to_pack[3] = clamp(scale * (256.0f / 255.0f), 0.0f, 1.0f); + + if (has_alpha) + { + vals_to_pack[4] = l_a; + vals_to_pack[5] = h[3]; + } + + for (uint32_t c = 0; c < total_vals_to_pack; c++) + { + const float v = vals_to_pack[c]; + const int iv = clamp((int)std::roundf(v * 255.0f), 0, 255); + + float best_err = BIG_FLOAT_VAL; + for (int iscale_delta = -1; iscale_delta <= 1; iscale_delta++) + { + const int trial_ise_index = astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, quant_tab[iv], iscale_delta); + + const float dequant_val = dequant_tab[trial_ise_index] * (1.0f / 255.0f); + + const float dequant_err = fabs(dequant_val - v); + if (dequant_err < best_err) + { + best_err = dequant_err; + pEndpoint_vals[c] = (uint8_t)trial_ise_index; + } + } // iscale_delta + + } // c + } + +#if 0 + static int clamp6(int val, bool& was_clamped) + { + if (val < -32) + { + val = -32; + was_clamped = true; + } + else if (val > 31) + { + val = 31; + was_clamped = true; + } + return val; + } +#endif + + // returns true if blue contraction was used + // note the encoded endpoints may be swapped + struct rgb_base_offset_res + { + bool m_failed_flag; + bool m_used_blue_contraction; + bool m_blue_contraction_clamped; + bool m_delta_clamped; + bool m_any_degen; + bool m_endpoints_swapped; + }; + + // May fail if the tiebreaking logic isn't strong enough. + static rgb_base_offset_res cem_encode_ldr_rgb_or_rgba_base_offset(uint32_t cem_index, uint32_t endpoint_ise_range, const color_rgba& orig_l, const color_rgba& orig_h, uint8_t* pEndpoint_vals, bool use_blue_contract) + { + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)); + + const bool has_alpha = (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET); + + rgb_base_offset_res res; + res.m_failed_flag = false; + res.m_used_blue_contraction = false; + res.m_blue_contraction_clamped = false; + res.m_delta_clamped = false; + res.m_any_degen = false; + res.m_endpoints_swapped = false; + + bool blue_contraction_clamped = false; + + bool status = basist::astc_ldr_t::pack_base_offset( + cem_index, endpoint_ise_range, pEndpoint_vals, + convert_to_basist_color_rgba(orig_l), convert_to_basist_color_rgba(orig_h), + use_blue_contract, true, + blue_contraction_clamped, res.m_delta_clamped, res.m_endpoints_swapped); + + assert(status); + + if (!status) + { + res.m_failed_flag = true; + return res; + } + + // Verify the actual BC status by unpacking to be absolutely sure + res.m_used_blue_contraction = astc_helpers::used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_range); + + color_rgba dec_l, dec_h; + astc_ldr::decode_endpoints(cem_index, pEndpoint_vals, endpoint_ise_range, dec_l, dec_h); + + const uint32_t num_comps = (has_alpha ? 4 : 3); + for (uint32_t c = 0; c < num_comps; c++) + { + if (orig_l[c] != orig_h[c]) + continue; + + // Desired L/H are not equal, but packed are equal=degenerate pack (loss of freedom). + if (dec_l[c] == dec_h[c]) + { + res.m_any_degen = true; + break; + } + } // c + + return res; + } + + // L or LA direct + static void encode_cem0_4(uint32_t cem_index, float lum_l, float lum_h, float a_l, float a_h, uint32_t endpoint_ise_range, uint8_t* pEndpoints) + { + assert((cem_index == astc_helpers::CEM_LDR_LUM_DIRECT) || (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT)); + + const bool has_alpha = (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + + pEndpoints[0] = precise_round_bise_endpoint_val(lum_l, endpoint_ise_range); + pEndpoints[1] = precise_round_bise_endpoint_val(lum_h, endpoint_ise_range); + + if (has_alpha) + { + pEndpoints[2] = precise_round_bise_endpoint_val(a_l, endpoint_ise_range); + pEndpoints[3] = precise_round_bise_endpoint_val(a_h, endpoint_ise_range); + } + } + + // Returned in ISE order + uint32_t get_colors(const color_rgba& l, const color_rgba& h, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb) + { + const uint32_t total_weights = astc_helpers::get_ise_levels(weight_ise_index); + + for (uint32_t i = 0; i < total_weights; i++) + { + uint32_t w = basisu::g_ise_weight_lerps[weight_ise_index][1 + i]; + + for (uint32_t c = 0; c < 4; c++) + { + int le = l[c], he = h[c]; + + // TODO: Investigate alpha handling here vs. latest spec. + // https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt + // The safest thing to do may be to assume non-sRGB in the encoder. I don't know yet. + // How should alpha be handled here for lowest divergence from actual ASTC decoding hardware? + if (decode_mode_srgb) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + uint32_t k = astc_helpers::weight_interpolate(le, he, w); + + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_compression_astc_decode_mode.txt + // All channels including alpha >>8. + pColors[i][c] = (uint8_t)(k >> 8); + } // c + } // i + + return total_weights; + } + + // Returns 65 colors (NOT just 64 - 0-64 weight levels, so 65). + uint32_t get_colors_raw_weights(const color_rgba& l, const color_rgba& h, color_rgba* pColors, bool decode_mode_srgb) + { + for (uint32_t w = 0; w <= 64; w++) + { + for (uint32_t c = 0; c < 4; c++) + { + int le = l[c], he = h[c]; + + // TODO: Investigate alpha handling here vs. latest spec. + // https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt + // The safest thing to do may be to assume non-sRGB in the encoder. I don't know yet. + // How should alpha be handled here for lowest divergence from actual ASTC decoding hardware? + if (decode_mode_srgb) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + uint32_t k = astc_helpers::weight_interpolate(le, he, w); + + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_compression_astc_decode_mode.txt + // All channels including alpha >>8. + pColors[w][c] = (uint8_t)(k >> 8); + + } // c + } // i + + return ASTC_LDR_MAX_RAW_WEIGHTS; + } + + // Assumes ise 20 (256 levels) + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h) + { + assert(astc_helpers::is_cem_ldr(cem_index)); + + int ldr_endpoints[4][2]; + astc_helpers::decode_endpoint(cem_index, ldr_endpoints, pEndpoint_vals); + + for (uint32_t c = 0; c < 4; c++) + { + assert((ldr_endpoints[c][0] >= 0) && (ldr_endpoints[c][0] <= 255)); + assert((ldr_endpoints[c][1] >= 0) && (ldr_endpoints[c][1] <= 255)); + + l[c] = (uint8_t)ldr_endpoints[c][0]; + h[c] = (uint8_t)ldr_endpoints[c][1]; + } + } + + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale) + { + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + uint8_t dequantized_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + for (uint32_t i = 0; i < total_endpoint_vals; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + decode_endpoints_ise20(cem_index, dequantized_endpoints, l, h); + + if ((pScale) && ((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A))) + { + *pScale = (float)dequantized_endpoints[3] * (1.0f / 256.0f); + } + } + + uint32_t get_colors(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb) + { + color_rgba l, h; + decode_endpoints(cem_index, pEndpoint_vals, endpoint_ise_index, l, h); + + return get_colors(l, h, weight_ise_index, pColors, decode_mode_srgb); + } + + // Decodes 65 colors + uint32_t get_colors_raw_weights(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba* pColors, bool decode_mode_srgb) + { + color_rgba l, h; + decode_endpoints(cem_index, pEndpoint_vals, endpoint_ise_index, l, h); + + return get_colors_raw_weights(l, h, pColors, decode_mode_srgb); + } + +#if 0 + static vec4F calc_incremental_pca_4D(uint32_t num_pixels, const vec4F* pPixels, const vec4F& mean_f) + { + vec4F mean_axis(0.0f); + + for (uint32_t i = 0; i < num_pixels; i++) + { + vec4F orig_color(pPixels[i]); + + vec4F color(orig_color - mean_f); + + vec4F a(color * color[0]); + vec4F b(color * color[1]); + vec4F c(color * color[2]); + vec4F d(color * color[3]); + vec4F n(i ? mean_axis : color); + + n.normalize_in_place(); + + mean_axis[0] += a.dot(n); + mean_axis[1] += b.dot(n); + mean_axis[2] += c.dot(n); + mean_axis[3] += d.dot(n); + } + + if (mean_axis.norm() < 1e-5f) + mean_axis = vec4F(1.0f, 1.0f, 1.0f, 1.0f); + + mean_axis.normalize_in_place(); + + return mean_axis; + } +#endif + + // TODO: Try two-step Lanczos iteration/Rayleigh–Ritz approximation in a 2-dimensional Krylov subspace method vs. power method. + static vec4F calc_pca_4D(uint32_t num_pixels, const vec4F* pPixels, const vec4F& mean_f) + { + float m00 = 0, m01 = 0, m02 = 0, m03 = 0; + float m11 = 0, m12 = 0, m13 = 0; + float m22 = 0, m23 = 0; + float m33 = 0; + + for (size_t i = 0; i < num_pixels; ++i) + { + const vec4F v(pPixels[i] - mean_f); + + m00 += v[0] * v[0]; m01 += v[0] * v[1]; m02 += v[0] * v[2]; m03 += v[0] * v[3]; + m11 += v[1] * v[1]; m12 += v[1] * v[2]; m13 += v[1] * v[3]; + m22 += v[2] * v[2]; m23 += v[2] * v[3]; + m33 += v[3] * v[3]; + } + + // TODO: Seed from channel variances + vec4F v(.6f, .75f, .4f, .75f); + + const uint32_t NUM_POW_ITERS = 6; // must be even + for (uint32_t i = 0; i < NUM_POW_ITERS; ++i) + { + vec4F w( + m00 * v[0] + m01 * v[1] + m02 * v[2] + m03 * v[3], + m01 * v[0] + m11 * v[1] + m12 * v[2] + m13 * v[3], + m02 * v[0] + m12 * v[1] + m22 * v[2] + m23 * v[3], + m03 * v[0] + m13 * v[1] + m23 * v[2] + m33 * v[3] + ); + + if (i & 1) + w.normalize_in_place(); + v = w; + } + + if (v.norm() < 1e-5f) + v = vec4F(.5f, .5f, .5f, .5f); + + return v; + } + + static vec4F calc_pca_3D(uint32_t num_pixels, const vec4F* pPixels, const vec4F& mean_f) + { + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const vec4F& v = pPixels[i]; + float r = v[0] - mean_f[0]; + float g = v[1] - mean_f[1]; + float b = v[2] - mean_f[2]; + cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + if (m > 1e-10f) + { + m = 1.0f / m; + r *= m; g *= m; b *= m; + } + + xr = r; xg = g; xb = b; + } + + float nrm = xr * xr + xg * xg + xb * xb; + + vec4F axis(0.57735027f, 0.57735027f, 0.57735027f, 0.0f); + if (nrm > 1e-5f) + { + float inv_nrm = 1.0f / sqrtf(nrm); + xr *= inv_nrm; xg *= inv_nrm; xb *= inv_nrm; + axis.set(xr, xg, xb, 0); + } + + return axis; + } + + void pixel_stats_t::init(uint32_t num_pixels, const color_rgba* pPixels) + { + m_num_pixels = num_pixels; + m_has_alpha = false; + + m_min.set(255, 255, 255, 255); + m_max.set(0, 0, 0, 0); + + m_mean_f.clear(); + + for (uint32_t i = 0; i < m_num_pixels; i++) + { + const color_rgba& px = pPixels[i]; + + m_pixels[i] = px; + + m_pixels_f[i].set((float)px.r * (1.0f / 255.0f), (float)px.g * (1.0f / 255.0f), (float)px.b * (1.0f / 255.0f), (float)px.a * (1.0f / 255.0f)); + + m_mean_f += m_pixels_f[i]; + + m_min.r = basisu::minimum(m_min.r, px.r); + m_min.g = basisu::minimum(m_min.g, px.g); + m_min.b = basisu::minimum(m_min.b, px.b); + m_min.a = basisu::minimum(m_min.a, px.a); + + m_max.r = basisu::maximum(m_max.r, px.r); + m_max.g = basisu::maximum(m_max.g, px.g); + m_max.b = basisu::maximum(m_max.b, px.b); + m_max.a = basisu::maximum(m_max.a, px.a); + } + + m_mean_f *= (1.0f / (float)m_num_pixels); + m_mean_f.clamp(0.0f, 1.0f); + + m_min_f.set(m_min.r * (1.0f / 255.0f), m_min.g * (1.0f / 255.0f), m_min.b * (1.0f / 255.0f), m_min.a * (1.0f / 255.0f)); + m_max_f.set(m_max.r * (1.0f / 255.0f), m_max.g * (1.0f / 255.0f), m_max.b * (1.0f / 255.0f), m_max.a * (1.0f / 255.0f)); + + m_has_alpha = (m_min.a < 255); + + // Mean and zero relative RGB (3D) PCA axes + m_mean_rel_axis3 = calc_pca_3D(m_num_pixels, m_pixels_f, m_mean_f); + m_zero_rel_axis3 = calc_pca_3D(m_num_pixels, m_pixels_f, vec4F(0.0f)); + + // Mean and zero relative RGBA (4D) PCA axes + m_mean_rel_axis4 = calc_pca_4D(m_num_pixels, m_pixels_f, m_mean_f); + + for (uint32_t c = 0; c < 4u; c++) + m_rgba_stats[c].calc_simplified_with_range(m_num_pixels, &m_pixels_f[0][c], 4); + } + + static inline uint32_t square_of_diff(int a, int b) + { + assert((a >= 0) && (a <= 255)); + assert((b >= 0) && (b <= 255)); + + int d = a - b; + return (uint32_t)(d * d); + } + + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params) + { + BASISU_NOTE_UNUSED(weight_ise_index); + assert((total_weights <= 32) || (total_weights == 65)); + + uint64_t total_err = 0; + + if (params.m_pForced_weight_vals0) + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + const uint32_t w = params.m_pForced_weight_vals0[c]; + assert(w < total_weights); + + uint32_t err = + params.m_comp_weights[0] * square_of_diff(px.r, pWeight_colors[w].r) + + params.m_comp_weights[1] * square_of_diff(px.g, pWeight_colors[w].g) + + params.m_comp_weights[2] * square_of_diff(px.b, pWeight_colors[w].b) + + params.m_comp_weights[3] * square_of_diff(px.a, pWeight_colors[w].a); + + total_err += err; + + pWeight_vals[c] = (uint8_t)w; + } + } + else + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + uint32_t best_err = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t i = 0; i < total_weights; i++) + { + uint32_t err = + params.m_comp_weights[0] * square_of_diff(px.r, pWeight_colors[i].r) + + params.m_comp_weights[1] * square_of_diff(px.g, pWeight_colors[i].g) + + params.m_comp_weights[2] * square_of_diff(px.b, pWeight_colors[i].b) + + params.m_comp_weights[3] * square_of_diff(px.a, pWeight_colors[i].a); + + if (err < best_err) + { + best_err = err; + best_sel = i; + } + } + + total_err += best_err; + pWeight_vals[c] = (uint8_t)best_sel; + } + } // if (params.m_pForced_weight_vals0) + + return total_err; + } + + // Evaluates against raw weights [0,64], or to ISE quantized weights, depending on weight_ise_index. + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params) + { + assert((weight_ise_index <= astc_helpers::BISE_32_LEVELS) || (weight_ise_index == astc_helpers::BISE_64_LEVELS)); + + color_rgba weight_colors[ASTC_LDR_MAX_RAW_WEIGHTS]; + uint32_t num_weights; + + assert((weight_ise_index <= astc_helpers::BISE_32_LEVELS) || (weight_ise_index == astc_helpers::BISE_64_LEVELS)); + + // 64 levels isn't valid ASTC. It's used for raw weight mode. + if (weight_ise_index == astc_helpers::BISE_64_LEVELS) + num_weights = get_colors_raw_weights(cem_index, pEndpoint_vals, endpoint_ise_index, weight_colors, params.m_decode_mode_srgb); + else + num_weights = get_colors(cem_index, pEndpoint_vals, endpoint_ise_index, weight_ise_index, weight_colors, params.m_decode_mode_srgb); + + assert(num_weights <= std::size(weight_colors)); + + uint64_t trial_err = eval_solution( + pixel_stats, + num_weights, weight_colors, + pWeight_vals, weight_ise_index, + params); + + return trial_err; + } + + // Evaluates against raw weights [0,64], or to ISE quantized weights, depending on weight_ise_index. + uint64_t eval_solution_dp( + uint32_t ccs_index, + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params) + { + BASISU_NOTE_UNUSED(weight_ise_index); + + assert(ccs_index <= 3); + assert((total_weights <= 32) || (total_weights == 65)); + + uint64_t total_err = 0; + + if (params.m_pForced_weight_vals0) + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + const uint32_t w = params.m_pForced_weight_vals0[c]; + assert(w < total_weights); + + uint32_t err = 0; + for (uint32_t o = 0; o < 4; o++) + if (o != ccs_index) + err += params.m_comp_weights[o] * square_of_diff(px[o], pWeight_colors[w][o]); + + total_err += err; + + pWeight_vals0[c] = (uint8_t)w; + } + } + else + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + uint32_t best_err = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t i = 0; i < total_weights; i++) + { + uint32_t err = 0; + for (uint32_t o = 0; o < 4; o++) + if (o != ccs_index) + err += params.m_comp_weights[o] * square_of_diff(px[o], pWeight_colors[i][o]); + + if (err < best_err) + { + best_err = err; + best_sel = i; + } + } + + total_err += best_err; + pWeight_vals0[c] = (uint8_t)best_sel; + } + } + + if (params.m_pForced_weight_vals1) + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + const uint32_t w = params.m_pForced_weight_vals1[c]; + assert(w < total_weights); + + uint32_t err = square_of_diff(px[ccs_index], pWeight_colors[w][ccs_index]); + + total_err += err * params.m_comp_weights[ccs_index]; + pWeight_vals1[c] = (uint8_t)w; + } + } + else + { + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const color_rgba& px = pixel_stats.m_pixels[c]; + + uint32_t best_err = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t i = 0; i < total_weights; i++) + { + uint32_t err = square_of_diff(px[ccs_index], pWeight_colors[i][ccs_index]); + + if (err < best_err) + { + best_err = err; + best_sel = i; + } + } + + total_err += best_err * params.m_comp_weights[ccs_index]; + pWeight_vals1[c] = (uint8_t)best_sel; + } + } + + return total_err; + } + + // Evaluates against raw weights [0,64], or to ISE quantized weights, depending on weight_ise_index. + uint64_t eval_solution_dp( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, uint32_t ccs_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params) + { + assert((weight_ise_index <= astc_helpers::BISE_32_LEVELS) || (weight_ise_index == astc_helpers::BISE_64_LEVELS)); + + color_rgba weight_colors[ASTC_LDR_MAX_RAW_WEIGHTS]; + uint32_t num_weights; + + // 64 levels isn't valid ASTC. It's used for raw weight mode. + if (weight_ise_index == astc_helpers::BISE_64_LEVELS) + num_weights = get_colors_raw_weights(cem_index, pEndpoint_vals, endpoint_ise_index, weight_colors, params.m_decode_mode_srgb); + else + num_weights = get_colors(cem_index, pEndpoint_vals, endpoint_ise_index, weight_ise_index, weight_colors, params.m_decode_mode_srgb); + + uint64_t trial_err = eval_solution_dp( + ccs_index, + pixel_stats, + num_weights, weight_colors, + pWeight_vals0, pWeight_vals1, weight_ise_index, + params); + + return trial_err; + } + + // Direct - refine ISE quantized endpoints from float endpoints + static void refine_cem8_or_12_endpoints(uint32_t cem_index, uint32_t endpoint_ise_range, uint8_t* pTrial_endpoint_vals, const vec4F& low_color_f, const vec4F& high_color_f, bool endpoints_are_swapped) + { + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + + if (endpoint_ise_range == astc_helpers::BISE_256_LEVELS) + return; + + const uint32_t total_comps = (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT) ? 4 : 3; + + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t num_endpoint_ise_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_val; + + const auto& ISE_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_rank; + const auto& rank_to_ISE = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_rank_to_ISE; + + const bool orig_used_blue_contraction = astc_helpers::cem8_or_12_used_blue_contraction(cem_index, pTrial_endpoint_vals, endpoint_ise_range); + + uint32_t first_comp = 0; + + uint8_t refined_endpoint_vals[astc_helpers::NUM_MODE12_ENDPOINTS]; + memcpy(refined_endpoint_vals, pTrial_endpoint_vals, total_endpoint_vals); + + if (orig_used_blue_contraction) + { + // TODO expensive: 2*3*9 = 54 tries + for (uint32_t e = 0; e < 2; e++) + { + float best_err = BIG_FLOAT_VAL; + uint8_t best_refined_endpoint_vals[3] = { 0, 0, 0 }; + + for (int b_delta = -1; b_delta <= 1; b_delta++) + { + for (int k = 0; k < 9; k++) + { + const int r_delta = (k % 3) - 1; + const int g_delta = (k / 3) - 1; + + const int comp_deltas[3] = { r_delta, g_delta, b_delta }; + + uint8_t trial_refined_endpoint_vals[3] = { 0, 0, 0 }; + + for (uint32_t c = 0; c < 3; c++) + { + const int enc_val = pTrial_endpoint_vals[c * 2 + e]; + + const int orig_rank = ISE_to_rank[enc_val]; + + const int v_delta = comp_deltas[c]; + const int new_rank = basisu::clamp(orig_rank + v_delta, 0, (int)num_endpoint_ise_levels - 1); + const int new_enc_ise_val = rank_to_ISE[new_rank]; + + trial_refined_endpoint_vals[c] = (uint8_t)new_enc_ise_val; + + } // c + + color_rgba trial_refined_endpoints_dequant(blue_contract_dec(endpoint_dequant_tab[trial_refined_endpoint_vals[0]], endpoint_dequant_tab[trial_refined_endpoint_vals[1]], endpoint_dequant_tab[trial_refined_endpoint_vals[2]], 255)); + + vec3F trial_refined_endpoints_dequant_f(0.0f); + for (uint32_t c = 0; c < 3; c++) + trial_refined_endpoints_dequant_f[c] = (float)trial_refined_endpoints_dequant[c] * (1.0f / 255.0f); + + vec3F desired_endpoint; + if (endpoints_are_swapped) + desired_endpoint = (e == 0) ? vec3F(high_color_f) : vec3F(low_color_f); + else + desired_endpoint = (e == 0) ? vec3F(low_color_f) : vec3F(high_color_f); + + float trial_err = desired_endpoint.squared_distance(trial_refined_endpoints_dequant_f); + if (trial_err < best_err) + { + best_err = trial_err; + memcpy(best_refined_endpoint_vals, trial_refined_endpoint_vals, 3); + } + + } // k + + } // b_delta + + for (uint32_t c = 0; c < 3; c++) + { + refined_endpoint_vals[c * 2 + e] = best_refined_endpoint_vals[c]; + } // c + + } // e + + // just refine A now (if it exists) + first_comp = 3; + } + + if (first_comp < total_comps) + { + for (uint32_t e = 0; e < 2; e++) + { + for (uint32_t c = first_comp; c < total_comps; c++) + { + const uint32_t idx = c * 2 + e; + const int enc_val = pTrial_endpoint_vals[idx]; + + const int orig_rank = ISE_to_rank[enc_val]; + + int best_rank = orig_rank; + float best_err = BIG_FLOAT_VAL; + for (int v_delta = -1; v_delta <= 1; v_delta++) + { + int new_rank = basisu::clamp(orig_rank + v_delta, 0, (int)num_endpoint_ise_levels - 1); + int new_enc_ise_val = rank_to_ISE[new_rank]; + + float dequant_val = (float)endpoint_dequant_tab[new_enc_ise_val] * (1.0f / 255.0f); + + float orig_val; + if (endpoints_are_swapped) + orig_val = (e == 0) ? high_color_f[c] : low_color_f[c]; + else + orig_val = (e == 0) ? low_color_f[c] : high_color_f[c]; + + float err = fabsf(dequant_val - orig_val); + if (err < best_err) + { + best_err = err; + best_rank = new_rank; + } + } + + refined_endpoint_vals[idx] = (uint8_t)rank_to_ISE[best_rank]; + + } // c + } // e + } + + bool refined_used_blue_contraction = astc_helpers::cem8_or_12_used_blue_contraction(cem_index, refined_endpoint_vals, endpoint_ise_range); + if (refined_used_blue_contraction == orig_used_blue_contraction) + { + memcpy(pTrial_endpoint_vals, refined_endpoint_vals, total_endpoint_vals); + } + } + + // Direct L/LA, single plane + static bool try_cem0_or_4(uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + float lum_l, float lum_h, float a_l, float a_h, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals, uint64_t& trial_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_LUM_DIRECT) || (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT)); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE4_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + encode_cem0_4(cem_index, lum_l, lum_h, a_l, a_h, endpoint_ise_range, trial_endpoint_vals); + + uint64_t trial_err = eval_solution( + pixel_stats, + cem_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + bool improved_flag = false; + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + improved_flag = true; + } + + bool any_degen = false; + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + any_degen = true; + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + any_degen = true; + } + + if (any_degen) + { + const int l_delta = (lum_l < lum_h) ? -1 : 1; + const int a_delta = (a_l < a_h) ? -1 : 1; + + for (uint32_t t = 1; t <= 3; t++) + { + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE4_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + if (t & 1) + { + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + fixed_endpoint_vals[0] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[0], l_delta); + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + fixed_endpoint_vals[2] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[2], a_delta); + } + } + + if (t & 2) + { + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + fixed_endpoint_vals[1] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[1], -l_delta); + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + fixed_endpoint_vals[3] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[3], -a_delta); + } + } + + trial_err = eval_solution( + pixel_stats, + cem_index, fixed_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + improved_flag = true; + } + + } // t + } + + return improved_flag; + } + + static bool try_cem4_dp_a(uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + float lum_l, float lum_h, float a_l, float a_h, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals0, uint8_t* pTrial_weight_vals1, uint64_t& trial_blk_error) + { + assert(g_initialized); + assert(cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE4_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals0[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_weight_vals1[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + encode_cem0_4(cem_index, lum_l, lum_h, a_l, a_h, endpoint_ise_range, trial_endpoint_vals); + + uint64_t trial_err = eval_solution_dp( + pixel_stats, cem_index, 3, + trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + bool improved_flag = false; + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + improved_flag = true; + } + + bool any_degen = false; + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + any_degen = true; + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + any_degen = true; + } + + if (any_degen) + { + const int l_delta = (lum_l < lum_h) ? -1 : 1; + const int a_delta = (a_l < a_h) ? -1 : 1; + + for (uint32_t t = 1; t <= 3; t++) + { + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE4_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + if (t & 1) + { + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + fixed_endpoint_vals[0] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[0], l_delta); + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + fixed_endpoint_vals[2] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[2], a_delta); + } + } + + if (t & 2) + { + if ((trial_endpoint_vals[0] == trial_endpoint_vals[1]) && (lum_l != lum_h)) + fixed_endpoint_vals[1] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[1], -l_delta); + + if (cem_has_alpha) + { + if ((trial_endpoint_vals[2] == trial_endpoint_vals[3]) && (a_l != a_h)) + fixed_endpoint_vals[3] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[3], -a_delta); + } + } + + trial_err = eval_solution_dp( + pixel_stats, cem_index, 3, + fixed_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + improved_flag = true; + } + + } // t + } + + return improved_flag; + } + + // Direct RGB/RGBA + // Cannot fail, but may have to fall back to non-blue-contracted + // Returns false if trial solution not improved + static bool try_cem8_12( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + const vec4F& low_color_f, const vec4F& high_color_f, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals, uint64_t& trial_blk_error, bool& trial_used_blue_contraction, + bool try_blue_contract, bool& tried_used_blue_contraction) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t num_comps = (cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) ? 3 : 4; + + color_rgba low_color, high_color; + for (uint32_t c = 0; c < 4; c++) + { + low_color[c] = (uint8_t)basisu::clamp((int)std::round(low_color_f[c] * 255.0f), 0, 255); + high_color[c] = (uint8_t)basisu::clamp((int)std::round(high_color_f[c] * 255.0f), 0, 255); + } + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE12_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + // Cannot fail, but may have to fall back to non-blue-contracted + cem_encode_ldr_rgb_or_rgba_direct_result res = cem_encode_ldr_rgb_or_rgba_direct(cem_index, endpoint_ise_range, low_color, high_color, trial_endpoint_vals, try_blue_contract); + + // Let caller know if we tried blue contraction + tried_used_blue_contraction = res.m_is_blue_contracted; + + if (endpoint_ise_range < astc_helpers::BISE_256_LEVELS) + { + refine_cem8_or_12_endpoints(cem_index, endpoint_ise_range, trial_endpoint_vals, low_color_f, high_color_f, res.m_endpoints_are_swapped); + } + + uint64_t trial_err = eval_solution( + pixel_stats, cem_index, + trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + bool improved_flag = false; + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_is_blue_contracted; + improved_flag = true; + } + + if (res.m_any_degen) + { + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_endpoint_vals, endpoint_ise_range, dec_l, dec_h); + + uint32_t s0 = dec_l.r + dec_l.g + dec_l.b + dec_l.a; + uint32_t s1 = dec_h.r + dec_h.g + dec_h.b + dec_h.a; + if (astc_helpers::cem8_or_12_used_blue_contraction(cem_index, trial_endpoint_vals, endpoint_ise_range)) + std::swap(s0, s1); + + for (uint32_t t = 1; t <= 3; t++) + { + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE12_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + if (t & 1) + { + for (uint32_t c = 0; c < num_comps; c++) + { + uint32_t l_idx = c * 2 + 0; + uint32_t h_idx = c * 2 + 1; + + if ((trial_endpoint_vals[l_idx] == trial_endpoint_vals[h_idx]) && (low_color[c] != high_color[c])) + { + int delta = (s0 <= s1) ? -1 : 1; + + fixed_endpoint_vals[l_idx] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[l_idx], delta); + } + } + } + + if (t & 2) + { + for (uint32_t c = 0; c < num_comps; c++) + { + uint32_t l_idx = c * 2 + 0; + uint32_t h_idx = c * 2 + 1; + + if ((trial_endpoint_vals[l_idx] == trial_endpoint_vals[h_idx]) && (low_color[c] != high_color[c])) + { + int delta = (s0 <= s1) ? 1 : -1; + + fixed_endpoint_vals[h_idx] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[h_idx], delta); + } + } + } + + bool fixed_used_blue_contraction = astc_helpers::cem8_or_12_used_blue_contraction(cem_index, fixed_endpoint_vals, endpoint_ise_range); + if (fixed_used_blue_contraction != res.m_is_blue_contracted) + continue; + + trial_err = eval_solution( + pixel_stats, + cem_index, fixed_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_is_blue_contracted; + improved_flag = true; + } + + } // t + + } // if (res.m_any_degen) + + return improved_flag; + } + + static bool try_cem8_12_dp( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + const vec4F& low_color_f, const vec4F& high_color_f, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals0, uint8_t* pTrial_weight_vals1, uint64_t& trial_blk_error, bool& trial_used_blue_contraction, + bool try_blue_contract, bool& tried_used_blue_contraction) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)); + + bool improved_flag = false; + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t num_comps = (cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) ? 3 : 4; + + color_rgba low_color, high_color; + for (uint32_t c = 0; c < 4; c++) + { + low_color[c] = (uint8_t)basisu::clamp((int)std::round(low_color_f[c] * 255.0f), 0, 255); + high_color[c] = (uint8_t)basisu::clamp((int)std::round(high_color_f[c] * 255.0f), 0, 255); + } + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE12_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals0[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_weight_vals1[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + // Cannot fail, but may have to fall back to non-blue-contracted + cem_encode_ldr_rgb_or_rgba_direct_result res = cem_encode_ldr_rgb_or_rgba_direct(cem_index, endpoint_ise_range, low_color, high_color, trial_endpoint_vals, try_blue_contract); + + // Let caller know if we tried blue contraction + tried_used_blue_contraction = res.m_is_blue_contracted; + + if (endpoint_ise_range < astc_helpers::BISE_256_LEVELS) + { + refine_cem8_or_12_endpoints(cem_index, endpoint_ise_range, trial_endpoint_vals, low_color_f, high_color_f, res.m_endpoints_are_swapped); + } + + uint64_t trial_err = eval_solution_dp(pixel_stats, cem_index, ccs_index, trial_endpoint_vals, endpoint_ise_range, trial_weight_vals0, trial_weight_vals1, weight_ise_range, enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_is_blue_contracted; + improved_flag = true; + } + + if (res.m_any_degen) + { + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_endpoint_vals, endpoint_ise_range, dec_l, dec_h); + + uint32_t s0 = dec_l.r + dec_l.g + dec_l.b + dec_l.a; + uint32_t s1 = dec_h.r + dec_h.g + dec_h.b + dec_h.a; + if (astc_helpers::cem8_or_12_used_blue_contraction(cem_index, trial_endpoint_vals, endpoint_ise_range)) + std::swap(s0, s1); + + for (uint32_t t = 1; t <= 3; t++) + { + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE12_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + if (t & 1) + { + for (uint32_t c = 0; c < num_comps; c++) + { + uint32_t l_idx = c * 2 + 0; + uint32_t h_idx = c * 2 + 1; + + if ((trial_endpoint_vals[l_idx] == trial_endpoint_vals[h_idx]) && (low_color[c] != high_color[c])) + { + int delta = (s0 <= s1) ? -1 : 1; + + fixed_endpoint_vals[l_idx] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[l_idx], delta); + } + } + } + + if (t & 2) + { + for (uint32_t c = 0; c < num_comps; c++) + { + uint32_t l_idx = c * 2 + 0; + uint32_t h_idx = c * 2 + 1; + + if ((trial_endpoint_vals[l_idx] == trial_endpoint_vals[h_idx]) && (low_color[c] != high_color[c])) + { + int delta = (s0 <= s1) ? 1 : -1; + + fixed_endpoint_vals[h_idx] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_endpoint_vals[h_idx], delta); + } + } + } + + bool fixed_used_blue_contraction = astc_helpers::cem8_or_12_used_blue_contraction(cem_index, fixed_endpoint_vals, endpoint_ise_range); + if (fixed_used_blue_contraction != res.m_is_blue_contracted) + continue; + + trial_err = eval_solution_dp(pixel_stats, cem_index, ccs_index, fixed_endpoint_vals, endpoint_ise_range, trial_weight_vals0, trial_weight_vals1, weight_ise_range, enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + improved_flag = true; + } + + } // t + + } // if (res.m_any_degen) + + return improved_flag; + } + + // base+offset rgb/rgba, single or dual plane + static bool try_cem9_13_sp_or_dp( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + const vec4F& low_color_f, const vec4F& high_color_f, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals0, uint8_t* pTrial_weight_vals1, uint64_t& trial_blk_error, bool& trial_used_blue_contraction, + bool try_blue_contract, bool& tried_used_blue_contraction, bool &tried_base_ofs_clamped) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)); + assert((ccs_index >= -1) && (ccs_index <= 3)); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + assert(pTrial_weight_vals0); + assert((ccs_index == -1) || (pTrial_weight_vals1)); + + //const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t num_comps = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) ? 3 : 4; + + color_rgba low_color, high_color; + for (uint32_t c = 0; c < 4; c++) + { + low_color[c] = (uint8_t)basisu::clamp((int)std::round(low_color_f[c] * 255.0f), 0, 255); + high_color[c] = (uint8_t)basisu::clamp((int)std::round(high_color_f[c] * 255.0f), 0, 255); + } + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE13_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals0[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_weight_vals1[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + rgb_base_offset_res res = cem_encode_ldr_rgb_or_rgba_base_offset(cem_index, endpoint_ise_range, low_color, high_color, trial_endpoint_vals, try_blue_contract); + + tried_used_blue_contraction = res.m_used_blue_contraction; + tried_base_ofs_clamped = res.m_delta_clamped; + + if (res.m_failed_flag) + return false; + + bool improved_flag = false; + + if (ccs_index == -1) + { + uint64_t trial_err = eval_solution( + pixel_stats, + cem_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + if (pTrial_weight_vals1) + memset(pTrial_weight_vals1, 0, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + improved_flag = true; + } + } + else + { + uint64_t trial_err = eval_solution_dp( + pixel_stats, + cem_index, ccs_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + improved_flag = true; + } + } + + if (res.m_any_degen) + { + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_endpoint_vals, endpoint_ise_range, dec_l, dec_h); + + // The packing in these modes is so complex that we're going to approximate the biasing, and hope for the best. + const uint32_t num_ise_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + int vals_per_ise_level = (256 + num_ise_levels - 1) / num_ise_levels; + + // TODO: There is potential cross-talk between RGB and A with the way this is done. + for (uint32_t p = 1; p <= 3; p++) + { + color_rgba trial_low_color(low_color), trial_high_color(high_color); + + for (uint32_t c = 0; c < num_comps; c++) + { + if (low_color[c] == high_color[c]) + continue; + + if (dec_l[c] != dec_h[c]) + continue; + + int delta = (low_color[c] < high_color[c]) ? -1 : 1; + if (p & 1) + trial_low_color[c] = (uint8_t)basisu::clamp((int)trial_low_color[c] + vals_per_ise_level * delta, 0, 255); + + if (p & 2) + trial_high_color[c] = (uint8_t)basisu::clamp((int)trial_high_color[c] + vals_per_ise_level * -delta, 0, 255); + } // c + + res = cem_encode_ldr_rgb_or_rgba_base_offset(cem_index, endpoint_ise_range, trial_low_color, trial_high_color, trial_endpoint_vals, try_blue_contract); + + if (res.m_failed_flag) + continue; + + if (ccs_index == -1) + { + uint64_t trial_err = eval_solution( + pixel_stats, + cem_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + if (pTrial_weight_vals1) + memset(pTrial_weight_vals1, 0, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + if (res.m_delta_clamped) + tried_base_ofs_clamped = true; + improved_flag = true; + } + } + else + { + uint64_t trial_err = eval_solution_dp( + pixel_stats, + cem_index, ccs_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + if (res.m_delta_clamped) + tried_base_ofs_clamped = true; + improved_flag = true; + } + } + + } // p + } + else + { + // Now factor in the quantization introduced into the low (base) color, and apply this to the offset, for gain. + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_endpoint_vals, endpoint_ise_range, dec_l, dec_h); + + if (res.m_endpoints_swapped) + dec_l = low_color; // high color is the quantized base + else + dec_h = high_color; // low color is the quantized base + + res = cem_encode_ldr_rgb_or_rgba_base_offset(cem_index, endpoint_ise_range, dec_l, dec_h, trial_endpoint_vals, try_blue_contract); + + if (!res.m_failed_flag) + { + if (ccs_index == -1) + { + uint64_t trial_err = eval_solution( + pixel_stats, + cem_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + if (pTrial_weight_vals1) + memset(pTrial_weight_vals1, 0, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + if (res.m_delta_clamped) + tried_base_ofs_clamped = true; + improved_flag = true; + } + } + else + { + uint64_t trial_err = eval_solution_dp( + pixel_stats, + cem_index, ccs_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + trial_used_blue_contraction = res.m_used_blue_contraction; + if (res.m_delta_clamped) + tried_base_ofs_clamped = true; + improved_flag = true; + } + } + } + } + + return improved_flag; + } + + // l/la direct, single plane + static uint64_t encode_cem0_4( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals, uint64_t cur_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_LUM_DIRECT) || (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT)); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t total_weights = pixel_stats.m_num_pixels; + + float lum_l = BIG_FLOAT_VAL, lum_h = -BIG_FLOAT_VAL; + + float pixel1F[ASTC_LDR_MAX_BLOCK_PIXELS]; + vec2F pixel2F[ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + const vec4F& px = pixel_stats.m_pixels_f[i]; + + float l = (px[0] + px[1] + px[2]) * (1.0f / 3.0f); + + pixel1F[i] = l; + + pixel2F[i][0] = l; + pixel2F[i][1] = px[3]; + + lum_l = minimum(lum_l, l); + lum_h = maximum(lum_h, l); + } + + const float a_l = pixel_stats.m_min_f[3]; + const float a_h = pixel_stats.m_max_f[3]; + + const vec2F min_pixel2F(lum_l, a_l), max_pixel2F(lum_h, a_h); + + uint8_t trial_blk_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS] = { 0 }; + uint8_t trial_blk_weights[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint64_t trial_blk_error = UINT64_MAX; + + bool did_improve = try_cem0_or_4( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + lum_l, lum_h, a_l, a_h, + trial_blk_endpoints, trial_blk_weights, trial_blk_error); + BASISU_NOTE_UNUSED(did_improve); + + if (trial_blk_error == UINT64_MAX) + return cur_blk_error; + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + + const uint32_t NUM_LS_OPT_PASSES = 3; + + for (uint32_t pass = 0; pass < NUM_LS_OPT_PASSES; pass++) + { + vec2F xl(lum_l, a_l), xh(lum_h, a_h); + + bool ls_res; + if (cem_has_alpha) + { + ls_res = compute_least_squares_endpoints_2D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel2F, min_pixel2F, max_pixel2F); + + } + else + { + ls_res = compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl[0], &xh[0], pixel1F, lum_l, lum_h); + } + if (!ls_res) + break; + + bool did_improve_res = false; + + did_improve_res = try_cem0_or_4( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl[0], xh[0], xl[1], xh[1], + trial_blk_endpoints, trial_blk_weights, trial_blk_error); + + BASISU_NOTE_UNUSED(did_improve_res); + + if (trial_blk_error >= cur_blk_error) + break; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + + } // pass + + return cur_blk_error; + } + + // lum+alpha direct, dual plane + static uint64_t encode_cem4_dp_a( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error) + { + assert(g_initialized); + assert(cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t total_weights = pixel_stats.m_num_pixels; + + float alpha_vals[ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + const vec4F& px = pixel_stats.m_pixels_f[i]; + + alpha_vals[i] = px[3]; + } + + // First get plane0's low/high (lum) + uint8_t lum_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t lum_weights0[ASTC_LDR_MAX_BLOCK_PIXELS]; + + uint64_t lum_blk_error = encode_cem0_4( + astc_helpers::CEM_LDR_LUM_DIRECT, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + lum_endpoints, lum_weights0, UINT64_MAX); + + if (lum_blk_error == UINT64_MAX) + return cur_blk_error; + + const auto& dequant_endpoints_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_val; + + float lum_l = (float)dequant_endpoints_tab[lum_endpoints[0]] * (1.0f / 255.0f); + float lum_h = (float)dequant_endpoints_tab[lum_endpoints[1]] * (1.0f / 255.0f); + float a_l = pixel_stats.m_min_f[3]; + float a_h = pixel_stats.m_max_f[3]; + + uint8_t trial_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t trial_weights0[ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t trial_weights1[ASTC_LDR_MAX_BLOCK_PIXELS]; + uint64_t trial_blk_error = UINT64_MAX; + + bool did_improve = try_cem4_dp_a( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + lum_l, lum_h, a_l, a_h, + trial_endpoints, trial_weights0, trial_weights1, trial_blk_error); + + if (!did_improve) + { + assert(0); + return cur_blk_error; + } + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_weights0, total_weights); + memcpy(pWeight_vals1, trial_weights1, total_weights); + } + + const uint32_t NUM_LS_OPT_PASSES = 3; + + for (uint32_t pass = 0; pass < NUM_LS_OPT_PASSES; pass++) + { + float xl = pixel_stats.m_min_f[3], xh = pixel_stats.m_max_f[3]; + + bool ls_res = compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, trial_weights1, get_ls_weights_ise(weight_ise_range), + &xl, &xh, alpha_vals, pixel_stats.m_min_f[3], pixel_stats.m_max_f[3]); + if (!ls_res) + break; + + did_improve = try_cem4_dp_a( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + lum_l, lum_h, xl, xh, + trial_endpoints, trial_weights0, trial_weights1, trial_blk_error); + + if (!did_improve) + break; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_weights0, total_weights); + memcpy(pWeight_vals1, trial_weights1, total_weights); + + } // pass + + return cur_blk_error; + } + + struct weight_refiner + { + void init(uint32_t weight_ise_range, uint32_t total_pixels, const uint8_t *pInitial_ise_weights) + { + m_weight_ise_range = weight_ise_range; + m_total_pixels = total_pixels; + m_pISE_to_rank = &astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_rank; + m_pRank_to_ise = &astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_rank_to_ISE; + m_num_weight_levels = astc_helpers::get_ise_levels(weight_ise_range); + + for (uint32_t i = 0; i < total_pixels; i++) + m_start_weights[i] = (*m_pISE_to_rank)[pInitial_ise_weights[i]]; + + m_min_weight = UINT32_MAX; + m_max_weight = 0; + m_sum_weight = 0; + + for (uint32_t i = 0; i < total_pixels; i++) + { + const uint32_t weight = m_start_weights[i]; + m_sum_weight += weight; + m_min_weight = minimumu(m_min_weight, weight); + m_max_weight = maximumu(m_max_weight, weight); + } + } + + void refine(uint32_t pass_index, uint8_t* pTrial_ise_weights) + { + switch (pass_index) + { + case 0: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_min_weight) && (v < (m_num_weight_levels - 1))) + v++; + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 1: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_max_weight) && (v > 0)) + v--; + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 2: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_min_weight) && (v < (m_num_weight_levels - 1))) + v++; + else if ((v == m_max_weight) && (v > 0)) + v--; + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 3: + { + const int max_weight_rank_index = m_num_weight_levels - 1; + int ly = -1, hy = max_weight_rank_index + 1; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int s = (int)clampf(floor((float)max_weight_rank_index * ((float)m_start_weights[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_weight_rank_index); + pTrial_ise_weights[i] = (*m_pRank_to_ise)[s]; + } + + break; + } + case 4: + { + const int max_weight_rank_index = m_num_weight_levels - 1; + int ly = -2, hy = max_weight_rank_index + 2; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int s = (int)clampf(floor((float)max_weight_rank_index * ((float)m_start_weights[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_weight_rank_index); + pTrial_ise_weights[i] = (*m_pRank_to_ise)[s]; + } + + break; + } + case 5: + { + const int max_weight_rank_index = m_num_weight_levels - 1; + int ly = -1, hy = max_weight_rank_index + 2; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int s = (int)clampf(floor((float)max_weight_rank_index * ((float)m_start_weights[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_weight_rank_index); + pTrial_ise_weights[i] = (*m_pRank_to_ise)[s]; + } + + break; + } + case 6: + { + const int max_weight_rank_index = m_num_weight_levels - 1; + int ly = -2, hy = max_weight_rank_index + 1; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int s = (int)clampf(floor((float)max_weight_rank_index * ((float)m_start_weights[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_weight_rank_index); + pTrial_ise_weights[i] = (*m_pRank_to_ise)[s]; + } + + break; + } + case 7: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_min_weight) && (v < (m_num_weight_levels - 1))) + { + v++; + if (v < (m_num_weight_levels - 1)) + v++; + } + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + + break; + } + case 8: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_max_weight) && (v > 0)) + { + v--; + if (v > 0) + v--; + } + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 9: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if ((v == m_min_weight) && (v < (m_num_weight_levels - 1))) + { + v++; + if (v < (m_num_weight_levels - 1)) + v++; + } + else if ((v == m_max_weight) && (v > 0)) + { + v--; + if (v > 0) + v--; + } + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 10: + { + float mid_weight = (float)m_sum_weight / (float)m_total_pixels; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int v = m_start_weights[i]; + + float fv = ((float)v - mid_weight) * .8f + ((float)m_num_weight_levels * .5f); + + v = clamp((int)std::round(fv), 0, m_num_weight_levels - 1); + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 11: + { + float mid_weight = (float)m_sum_weight / (float)m_total_pixels; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int v = m_start_weights[i]; + + float fv = ((float)v - mid_weight) * .9f + ((float)m_num_weight_levels * .5f); + + v = clamp((int)std::round(fv), 0, m_num_weight_levels - 1); + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 12: + { + float mid_weight = (float)m_sum_weight / (float)m_total_pixels; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int v = m_start_weights[i]; + + float fv = ((float)v - mid_weight) * 1.1f + ((float)m_num_weight_levels * .5f); + + v = clamp((int)std::round(fv), 0, m_num_weight_levels - 1); + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 13: + { + float mid_weight = (float)m_sum_weight / (float)m_total_pixels; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int v = m_start_weights[i]; + + float fv; + if (v < mid_weight) + fv = ((float)v - mid_weight) * .8f + ((float)m_num_weight_levels * .5f); + else + fv = (float)v; + + v = clamp((int)std::round(fv), 0, m_num_weight_levels - 1); + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 14: + { + float mid_weight = (float)m_sum_weight / (float)m_total_pixels; + + for (uint32_t i = 0; i < m_total_pixels; i++) + { + int v = m_start_weights[i]; + + float fv; + if (v >= mid_weight) + fv = ((float)v - mid_weight) * .8f + ((float)m_num_weight_levels * .5f); + else + fv = (float)v; + + v = clamp((int)std::round(fv), 0, m_num_weight_levels - 1); + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 15: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if (v < (m_num_weight_levels - 1)) + v++; + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + case 16: + { + for (uint32_t i = 0; i < m_total_pixels; i++) + { + uint32_t v = m_start_weights[i]; + if (v) + v--; + + pTrial_ise_weights[i] = (*m_pRank_to_ise)[v]; + } + break; + } + default: + { + assert(0); + memset(pTrial_ise_weights, 0, m_total_pixels); + break; + } + } + } + + uint32_t m_total_pixels; + uint32_t m_weight_ise_range; + uint32_t m_num_weight_levels; + uint8_t m_start_weights[ASTC_LDR_MAX_BLOCK_PIXELS]; // ranks, not ISE + + uint32_t m_min_weight, m_max_weight, m_sum_weight; + + const basisu::vector* m_pISE_to_rank; + const basisu::vector* m_pRank_to_ise; + }; + + // rgb/rgba direct or rgb/rgba base+offset, single plane + static uint64_t encode_cem8_12_9_13( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals, uint64_t cur_blk_error, bool use_blue_contraction, bool* pBase_ofs_clamped_flag) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT) || + (cem_index == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)); + + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT) || (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET); + const bool cem_is_base_offset = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t total_weights = pixel_stats.m_num_pixels; + + float best_l = BIG_FLOAT_VAL, best_h = -BIG_FLOAT_VAL; + //int best_l_index = 0, best_h_index = 0; + + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const vec4F px(pixel_stats.m_pixels_f[c] - pixel_stats.m_mean_f); + + float p = cem_has_alpha ? px.dot(pixel_stats.m_mean_rel_axis4) : px.dot3(pixel_stats.m_mean_rel_axis3); + if (p < best_l) + { + best_l = p; + //best_l_index = c; + } + + if (p > best_h) + { + best_h = p; + //best_h_index = c; + } + } // c + +#if 0 + vec4F low_color_f(pixel_stats.m_pixels_f[best_l_index]), high_color_f(pixel_stats.m_pixels_f[best_h_index]); +#else + vec4F low_color_f, high_color_f; + if (cem_has_alpha) + { + low_color_f = pixel_stats.m_mean_rel_axis4 * best_l + pixel_stats.m_mean_f; + high_color_f = pixel_stats.m_mean_rel_axis4 * best_h + pixel_stats.m_mean_f; + } + else + { + low_color_f = vec4F(pixel_stats.m_mean_rel_axis3) * best_l + pixel_stats.m_mean_f; + high_color_f = vec4F(pixel_stats.m_mean_rel_axis3) * best_h + pixel_stats.m_mean_f; + } + + low_color_f.clamp(0.0f, 1.0f); + high_color_f.clamp(0.0f, 1.0f); +#endif + + uint8_t trial_blk_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS] = { 0 }; + uint8_t trial_blk_weights[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint64_t trial_blk_error = UINT64_MAX; + bool trial_used_blue_contraction = false; + + bool tried_used_blue_contraction = false; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, + tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, false, + tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction); + } + } + + if (trial_blk_error == UINT64_MAX) + return cur_blk_error; + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + + for (uint32_t pass = 0; pass < enc_params.m_max_ls_passes; pass++) + { + vec4F xl, xh; + + bool ls_res; + if (cem_has_alpha) + { + ls_res = compute_least_squares_endpoints_4D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + else + { + ls_res = compute_least_squares_endpoints_3D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + if (!ls_res) + break; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction); + } + } + + if (trial_blk_error >= cur_blk_error) + break; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + + } // pass + + if ((enc_params.m_total_weight_refine_passes) && ((weight_ise_range != astc_helpers::BISE_2_LEVELS) && (weight_ise_range != astc_helpers::BISE_64_LEVELS))) + { + weight_refiner refiner; + refiner.init(weight_ise_range, pixel_stats.m_num_pixels, pWeight_vals); + + for (uint32_t pass = 0; pass < enc_params.m_total_weight_refine_passes; pass++) + { + refiner.refine(pass, trial_blk_weights); + + vec4F xl, xh; + + bool ls_res; + if (cem_has_alpha) + { + ls_res = compute_least_squares_endpoints_4D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + else + { + ls_res = compute_least_squares_endpoints_3D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + if (!ls_res) + continue; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction); + } + } + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + + } // pass + } + + const uint32_t N = 4; + if ((enc_params.m_worst_weight_nudging_flag) && + (pixel_stats.m_num_pixels > N) && + ((weight_ise_range != astc_helpers::BISE_2_LEVELS) && (weight_ise_range != astc_helpers::BISE_64_LEVELS))) + { + const uint32_t NUM_NUDGING_PASSES = 1; + for (uint32_t pass = 0; pass < NUM_NUDGING_PASSES; pass++) + { + color_rgba l, h; + decode_endpoints(cem_index, pEndpoint_vals, endpoint_ise_range, l, h); + + vec4F dir; + dir[0] = (float)(h[0] - l[0]); + dir[1] = (float)(h[1] - l[1]); + dir[2] = (float)(h[2] - l[2]); + dir[3] = cem_has_alpha ? (float)(h[3] - l[3]) : 0.0f; + + dir.normalize_in_place(); + + float errs[ASTC_LDR_MAX_BLOCK_PIXELS]; + float delta_dots[ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + vec4F ofs(pixel_stats.m_pixels_f[i] - pixel_stats.m_mean_f); + + float proj = dir.dot(ofs); + + vec4F proj_vec(pixel_stats.m_mean_f + proj * dir); + + vec4F delta_vec(pixel_stats.m_pixels_f[i] - proj_vec); + + delta_dots[i] = dir.dot(delta_vec); + + errs[i] = cem_has_alpha ? vec4F::dot_product(delta_vec, delta_vec) : vec4F::dot_product3(delta_vec, delta_vec); + } + + uint32_t errs_indices[ASTC_LDR_MAX_BLOCK_PIXELS]; + indirect_sort(pixel_stats.m_num_pixels, errs_indices, errs); + + memcpy(trial_blk_weights, pWeight_vals, total_weights); + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t idx = errs_indices[pixel_stats.m_num_pixels - 1 - i]; + + int delta_to_apply = (delta_dots[idx] > 0.0f) ? 1 : -1; + + trial_blk_weights[idx] = (uint8_t)apply_delta_to_bise_weight_val(weight_ise_range, trial_blk_weights[idx], delta_to_apply); + } // i + + vec4F xl, xh; + + bool ls_res; + if (cem_has_alpha) + { + ls_res = compute_least_squares_endpoints_4D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + else + { + ls_res = compute_least_squares_endpoints_3D( + pixel_stats.m_num_pixels, trial_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, pixel_stats.m_pixels_f, pixel_stats.m_min_f, pixel_stats.m_max_f); + } + if (!ls_res) + break; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, -1, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, nullptr, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12( + cem_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction); + } + } + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + else + { + break; + } + } // pass + } + + if (enc_params.m_endpoint_refinement_flag) + { + const uint32_t num_comps = cem_has_alpha ? 4 : 3; + + for (uint32_t c = 0; c < num_comps; c++) + { + uint8_t base_endpoint_vals[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + memcpy(base_endpoint_vals, pEndpoint_vals, total_endpoint_vals); + + for (int dl = -1; dl <= 1; dl++) + { + for (int dh = -1; dh <= 1; dh++) + { + if (!dl && !dh) + continue; + + memcpy(trial_blk_endpoints, base_endpoint_vals, total_endpoint_vals); + + trial_blk_endpoints[c * 2 + 0] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_blk_endpoints[c * 2 + 0], dl); + trial_blk_endpoints[c * 2 + 1] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, trial_blk_endpoints[c * 2 + 1], dh); + + if (!use_blue_contraction) + { + const bool uses_blue_contraction = astc_helpers::used_blue_contraction(cem_index, trial_blk_endpoints, endpoint_ise_range); + if (uses_blue_contraction) + continue; + } + + trial_blk_error = eval_solution( + pixel_stats, + cem_index, trial_blk_endpoints, endpoint_ise_range, + trial_blk_weights, weight_ise_range, + enc_params); + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + + } // dh + + } // dl + } + } + + return cur_blk_error; + } + + // rgb/rgba direct, or rgb/rgba base+offset, dual plane + static uint64_t encode_cem8_12_9_13_dp( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, + uint64_t cur_blk_error, bool use_blue_contraction, bool *pBase_ofs_clamped_flag) + { + assert(g_initialized); + assert(ccs_index <= 3); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + bool cem_has_alpha = false, cem_is_base_offset = false; + switch (cem_index) + { + case astc_helpers::CEM_LDR_RGB_DIRECT: break; + case astc_helpers::CEM_LDR_RGBA_DIRECT: cem_has_alpha = true; break; + case astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET: cem_is_base_offset = true; break; + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: cem_is_base_offset = true; cem_has_alpha = true; break; + default: + assert(0); + return false; + } + + assert((ccs_index <= 2) || cem_has_alpha); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t total_weights = pixel_stats.m_num_pixels; + + // Remove influence of the 2nd plane's values, recalc principle axis on other values. + vec4F flattened_pixels[ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + flattened_pixels[i] = pixel_stats.m_pixels_f[i]; + flattened_pixels[i][ccs_index] = 0.0f; + + if (!cem_has_alpha) + flattened_pixels[i][3] = 0.0f; + } + + vec4F flattened_pixels_mean(pixel_stats.m_mean_f); + flattened_pixels_mean[ccs_index] = 0.0f; + + if (!cem_has_alpha) + flattened_pixels_mean[3] = 0.0f; + + vec4F flattened_axis; + if (!cem_has_alpha) + flattened_axis = calc_pca_3D(pixel_stats.m_num_pixels, flattened_pixels, flattened_pixels_mean); + else + flattened_axis = calc_pca_4D(pixel_stats.m_num_pixels, flattened_pixels, flattened_pixels_mean); + + float best_l = BIG_FLOAT_VAL, best_h = -BIG_FLOAT_VAL; + //int best_l_index = 0, best_h_index = 0; + + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const vec4F px(flattened_pixels[c] - flattened_pixels_mean); + + float p = px.dot(flattened_axis); + if (p < best_l) + { + best_l = p; + //best_l_index = c; + } + + if (p > best_h) + { + best_h = p; + //best_h_index = c; + } + } // c + +#if 0 + vec4F low_color_f(pixel_stats.m_pixels_f[best_l_index]), high_color_f(pixel_stats.m_pixels_f[best_h_index]); +#else + vec4F low_color_f, high_color_f; + low_color_f = flattened_pixels_mean + flattened_axis * best_l; + high_color_f = flattened_pixels_mean + flattened_axis * best_h; + + low_color_f.clamp(0.0f, 1.0f); + high_color_f.clamp(0.0f, 1.0f); +#endif + + low_color_f[ccs_index] = pixel_stats.m_min_f[ccs_index]; + high_color_f[ccs_index] = pixel_stats.m_max_f[ccs_index]; + + uint8_t trial_blk_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS] = { 0 }; + uint8_t trial_blk_weights0[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_blk_weights1[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint64_t trial_blk_error = UINT64_MAX; + bool trial_used_blue_contraction = false; + + bool tried_used_blue_contraction = false; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, + trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, + trial_blk_error, trial_used_blue_contraction, use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + low_color_f, high_color_f, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, false, tried_used_blue_contraction); + } + } + + if (trial_blk_error == UINT64_MAX) + return cur_blk_error; + + if (trial_blk_error < cur_blk_error) + { + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_blk_weights0, total_weights); + memcpy(pWeight_vals1, trial_blk_weights1, total_weights); + } + + vec4F flattened_pixels_min_f(pixel_stats.m_min_f); + flattened_pixels_min_f[ccs_index] = 0; + + vec4F flattened_pixels_max_f(pixel_stats.m_max_f); + flattened_pixels_max_f[ccs_index] = 0; + + for (uint32_t pass = 0; pass < enc_params.m_max_ls_passes; pass++) + { + vec4F xl, xh; + + // TODO: Switch between 4D or 3D + if (!compute_least_squares_endpoints_4D( + pixel_stats.m_num_pixels, trial_blk_weights0, get_ls_weights_ise(weight_ise_range), + &xl, &xh, flattened_pixels, flattened_pixels_min_f, flattened_pixels_max_f)) + { + break; + } + + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, dec_l, dec_h); + + xl[ccs_index] = dec_l[ccs_index] * (1.0f / 255.0f); + xh[ccs_index] = dec_h[ccs_index] * (1.0f / 255.0f); + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction); + } + } + + if (trial_blk_error >= cur_blk_error) + break; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_blk_weights0, total_weights); + memcpy(pWeight_vals1, trial_blk_weights1, total_weights); + + } // pass + + const float ccs_bounds_min = pixel_stats.m_min_f[ccs_index]; + const float ccs_bounds_max = pixel_stats.m_max_f[ccs_index]; + float ccs_vals[ASTC_LDR_MAX_BLOCK_PIXELS]; + + if (ccs_bounds_min != ccs_bounds_max) + { + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + ccs_vals[i] = pixel_stats.m_pixels_f[i][ccs_index]; + + for (uint32_t pass = 0; pass < enc_params.m_max_ls_passes; pass++) + { + float xl = 0.0f, xh = 0.0f; + + if (!compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, trial_blk_weights1, get_ls_weights_ise(weight_ise_range), + &xl, &xh, ccs_vals, ccs_bounds_min, ccs_bounds_max)) + { + break; + } + + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, dec_l, dec_h); + + vec4F vl, vh; + for (uint32_t c = 0; c < 4; c++) + { + if (c == ccs_index) + { + vl[c] = xl; + vh[c] = xh; + } + else + { + vl[c] = (float)dec_l[c] * (1.0f / 255.0f); + vh[c] = (float)dec_h[c] * (1.0f / 255.0f); + } + } + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction); + } + } + + if (trial_blk_error >= cur_blk_error) + break; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_blk_weights0, total_weights); + memcpy(pWeight_vals1, trial_blk_weights1, total_weights); + + } // pass + } + + if ((enc_params.m_total_weight_refine_passes) && ((weight_ise_range != astc_helpers::BISE_2_LEVELS) && (weight_ise_range != astc_helpers::BISE_64_LEVELS))) + { + weight_refiner refiner; + refiner.init(weight_ise_range, pixel_stats.m_num_pixels, pWeight_vals0); + + for (uint32_t pass = 0; pass < enc_params.m_total_weight_refine_passes; pass++) + { + refiner.refine(pass, trial_blk_weights0); + + vec4F xl, xh; + + if (!compute_least_squares_endpoints_4D( + pixel_stats.m_num_pixels, trial_blk_weights0, get_ls_weights_ise(weight_ise_range), + &xl, &xh, flattened_pixels, flattened_pixels_min_f, flattened_pixels_max_f)) + { + break; + } + + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, dec_l, dec_h); + + xl[ccs_index] = dec_l[ccs_index] * (1.0f / 255.0f); + xh[ccs_index] = dec_h[ccs_index] * (1.0f / 255.0f); + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + xl, xh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction); + } + } + + if (trial_blk_error >= cur_blk_error) + continue; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_blk_weights0, total_weights); + memcpy(pWeight_vals1, trial_blk_weights1, total_weights); + + } // pass + + if (ccs_bounds_min != ccs_bounds_max) + { + refiner.init(weight_ise_range, pixel_stats.m_num_pixels, pWeight_vals1); + + for (uint32_t pass = 0; pass < WEIGHT_REFINER_MAX_PASSES; pass++) + { + refiner.refine(pass, trial_blk_weights1); + + float xl = 0.0f, xh = 0.0f; + + if (!compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, trial_blk_weights1, get_ls_weights_ise(weight_ise_range), + &xl, &xh, ccs_vals, ccs_bounds_min, ccs_bounds_max)) + { + break; + } + + color_rgba dec_l(0), dec_h(0); + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, dec_l, dec_h); + + vec4F vl, vh; + for (uint32_t c = 0; c < 4; c++) + { + if (c == ccs_index) + { + vl[c] = xl; + vh[c] = xh; + } + else + { + vl[c] = (float)dec_l[c] * (1.0f / 255.0f); + vh[c] = (float)dec_h[c] * (1.0f / 255.0f); + } + } + + bool did_improve_res = false; + + if (cem_is_base_offset) + { + bool tried_base_ofs_clamped = false; + + did_improve_res = try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction, tried_base_ofs_clamped); + BASISU_NOTE_UNUSED(did_improve_res); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + did_improve_res = try_cem9_13_sp_or_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction, tried_base_ofs_clamped); + + if ((pBase_ofs_clamped_flag) && (tried_base_ofs_clamped)) + *pBase_ofs_clamped_flag = true; + } + } + else + { + did_improve_res = try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + use_blue_contraction, tried_used_blue_contraction); + + if (tried_used_blue_contraction) + { + // Try without blue contraction for a minor gain. + did_improve_res = try_cem8_12_dp( + cem_index, ccs_index, pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + vl, vh, + trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_used_blue_contraction, + false, tried_used_blue_contraction); + } + } + + if (trial_blk_error >= cur_blk_error) + continue; + + cur_blk_error = trial_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals0, trial_blk_weights0, total_weights); + memcpy(pWeight_vals1, trial_blk_weights1, total_weights); + + } // pass + } + } + + return cur_blk_error; + } + + // base scale rgb/rgba + // returns true if improved + static bool try_cem6_10( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + float scale, float low_a_f, const vec4F& high_color_f, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals, uint64_t& trial_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + cem_encode_ldr_rgb_or_rgba_base_scale(cem_index, endpoint_ise_range, scale, low_a_f, high_color_f, trial_endpoint_vals); + + uint64_t trial_err = eval_solution( + pixel_stats, cem_index, trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + bool improved_flag = false; + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + improved_flag = true; + } + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + // TODO + for (int delta = -1; delta <= 1; delta += 1) + { + if (!delta) + continue; + + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + fixed_endpoint_vals[3] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, fixed_endpoint_vals[3], delta); + + trial_err = eval_solution( + pixel_stats, cem_index, fixed_endpoint_vals, endpoint_ise_range, + trial_weight_vals, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals, trial_weight_vals, pixel_stats.m_num_pixels); + improved_flag = true; + } + } + + return improved_flag; + } + + static bool try_cem6_10_dp( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + float scale, float low_a_f, const vec4F& high_color_f, + uint8_t* pTrial_endpoint_vals, uint8_t* pTrial_weight_vals0, uint8_t* pTrial_weight_vals1, uint64_t& trial_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)); + assert(ccs_index <= 3); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + assert(pTrial_weight_vals0 && pTrial_weight_vals1); + + uint8_t trial_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t trial_weight_vals0[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_weight_vals1[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + cem_encode_ldr_rgb_or_rgba_base_scale(cem_index, endpoint_ise_range, scale, low_a_f, high_color_f, trial_endpoint_vals); + + uint64_t trial_err = eval_solution_dp( + pixel_stats, cem_index, ccs_index, + trial_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + bool improved_flag = false; + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, trial_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + improved_flag = true; + } + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + for (int delta = -1; delta <= 1; delta += 1) + { + if (!delta) + continue; + + uint8_t fixed_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS]; + memcpy(fixed_endpoint_vals, trial_endpoint_vals, num_endpoint_vals); + + fixed_endpoint_vals[3] = (uint8_t)astc_helpers::apply_delta_to_bise_endpoint_val(endpoint_ise_range, fixed_endpoint_vals[3], delta); + + trial_err = eval_solution_dp( + pixel_stats, cem_index, ccs_index, + fixed_endpoint_vals, endpoint_ise_range, + trial_weight_vals0, trial_weight_vals1, weight_ise_range, + enc_params); + + if (trial_err < trial_blk_error) + { + trial_blk_error = trial_err; + memcpy(pTrial_endpoint_vals, fixed_endpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + memcpy(pTrial_weight_vals0, trial_weight_vals0, pixel_stats.m_num_pixels); + memcpy(pTrial_weight_vals1, trial_weight_vals1, pixel_stats.m_num_pixels); + improved_flag = true; + } + } + + return improved_flag; + } + + // rgb/rgba base+scale + static uint64_t encode_cem6_10( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals, uint64_t cur_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + const uint32_t total_weights = pixel_stats.m_num_pixels; + + float best_l = BIG_FLOAT_VAL, best_h = -BIG_FLOAT_VAL; + //int best_l_index = 0, best_h_index = 0; + + for (uint32_t c = 0; c < pixel_stats.m_num_pixels; c++) + { + const vec3F px(pixel_stats.m_pixels_f[c]); + + float p = px.dot(pixel_stats.m_zero_rel_axis3); + + if (p < best_l) + { + best_l = p; + //best_l_index = c; + } + + if (p > best_h) + { + best_h = p; + //best_h_index = c; + } + } // c + + const float MAX_S = 255.0f / 256.0f; + const float EPS = 1e-6f; + + uint64_t trial_blk_error = UINT64_MAX; + uint8_t trial_blk_endpoints[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t trial_blk_weights[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + uint64_t best_blk_error = UINT64_MAX; + uint8_t best_blk_endpoints[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t best_blk_weights[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + vec3F low_color3_f(best_l * pixel_stats.m_zero_rel_axis3); + low_color3_f.clamp(0.0f, 1.0f); + + vec3F high_color3_f(best_h * pixel_stats.m_zero_rel_axis3); + high_color3_f.clamp(0.0f, 1.0f); + + float scale = MAX_S; + + float d = low_color3_f.dot(high_color3_f); + float nrm = high_color3_f.norm(); + if (nrm > 0.0f) + scale = saturate(d / nrm); + scale = minimum(scale, MAX_S); + + vec4F low_color_f(low_color3_f[0], low_color3_f[1], low_color3_f[2], pixel_stats.m_min_f[3]); + vec4F high_color_f(high_color3_f[0], high_color3_f[1], high_color3_f[2], pixel_stats.m_max_f[3]); + + try_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + scale, low_color_f[3], high_color_f, + trial_blk_endpoints, trial_blk_weights, trial_blk_error); + + best_blk_error = trial_blk_error; + memcpy(best_blk_endpoints, trial_blk_endpoints, total_endpoint_vals); + memcpy(best_blk_weights, trial_blk_weights, total_weights); + + const uint32_t NUM_PASSES = 2; + for (uint32_t pass = 0; pass < NUM_PASSES; pass++) + { + color_rgba actual_l(0), actual_h(0); + float actual_scale = 0; + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, actual_l, actual_h, &actual_scale); + + vec3F actual_high_f((float)actual_h[0], (float)actual_h[1], (float)actual_h[2]); + actual_high_f *= (1.0f / 255.0f); + + // invalid on raw weights + const auto& dequant_weights_tab = astc_helpers::g_dequant_tables.get_weight_tab(minimum(astc_helpers::BISE_32_LEVELS, weight_ise_range)).m_ISE_to_val; + + vec3F Pa(0.0f), Pb(0.0f); + float A = 0.0f, B = 0.0f, C = 0.0f; + + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + const vec3F px(pixel_stats.m_pixels_f[i]); + + const int iw = (weight_ise_range == astc_helpers::BISE_64_LEVELS) ? trial_blk_weights[i] : dequant_weights_tab[trial_blk_weights[i]]; + float t = (float)iw * (1.0f / 64.0f); + float bi = t, ai = 1.0f - t; + + Pa += px * ai; + Pb += px * bi; + + A += ai * ai; + B += ai * bi; + C += bi * bi; + } + + vec3F new_high = actual_high_f; + float new_scale = actual_scale; + + float h2 = actual_high_f.norm(); + if ((h2 > EPS) && (A > EPS)) + { + new_scale = (Pa.dot(actual_high_f) / h2 - B) / A; + new_scale = clamp(new_scale, 0.0f, MAX_S); + } + + const float den = A * new_scale * new_scale + 2.0f * B * new_scale + C; + if (den > EPS) + { + new_high = (Pb + Pa * new_scale) / den; + } + + h2 = new_high.norm(); + if ((h2 > EPS) && (A > EPS)) + { + new_scale = (Pa.dot(new_high) / h2 - B) / A; + new_scale = clamp(new_scale, 0.0f, MAX_S); + } + + try_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + new_scale, (float)actual_l[3] * (1.0f / 255.0f), vec4F(new_high[0], new_high[1], new_high[2], (float)actual_h[3] * (1.0f / 255.0f)), + trial_blk_endpoints, trial_blk_weights, trial_blk_error); + + if (trial_blk_error >= best_blk_error) + break; + + best_blk_error = trial_blk_error; + memcpy(best_blk_endpoints, trial_blk_endpoints, total_endpoint_vals); + memcpy(best_blk_weights, trial_blk_weights, total_weights); + + } // pass + + if (cem_has_alpha) + { + // Try to refine low a/high given the current selectors. + float bounds_min = pixel_stats.m_min_f[3]; + float bounds_max = pixel_stats.m_max_f[3]; + if (bounds_min != bounds_max) + { + float a_vals[ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + a_vals[i] = pixel_stats.m_pixels_f[i][3]; + + const uint32_t TOTAL_PASSES = 1; + for (uint32_t pass = 0; pass < TOTAL_PASSES; pass++) + { + float xl = 0.0f, xh = 0.0f; + + if (compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, best_blk_weights, get_ls_weights_ise(weight_ise_range), + &xl, &xh, a_vals, bounds_min, bounds_max)) + { + color_rgba actual_l(0), actual_h(0); + float actual_scale = 0; + decode_endpoints(cem_index, trial_blk_endpoints, endpoint_ise_range, actual_l, actual_h, &actual_scale); + + try_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + actual_scale, xl, vec4F(actual_h[0], actual_h[1], actual_h[2], xh), + trial_blk_endpoints, trial_blk_weights, trial_blk_error); + + if (trial_blk_error < best_blk_error) + { + best_blk_error = trial_blk_error; + memcpy(best_blk_endpoints, trial_blk_endpoints, total_endpoint_vals); + memcpy(best_blk_weights, trial_blk_weights, total_weights); + } + else + { + break; + } + } + else + { + break; + } + } // pass + } + } + + if (best_blk_error < cur_blk_error) + { + cur_blk_error = best_blk_error; + memcpy(pEndpoint_vals, trial_blk_endpoints, total_endpoint_vals); + memcpy(pWeight_vals, trial_blk_weights, total_weights); + } + + return cur_blk_error; + } + + // rgba base+scale, dual plane a, ccs_index must be 3 + static uint64_t encode_cem10_dp_a( + uint32_t cem_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error) + { + assert(g_initialized); + assert(cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + // RGB uses plane0, alpha plane1. So solve RGB first. + uint8_t rgba_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t rgb_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t a_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + // First just solve RGB, single plane. + uint64_t rgb_blk_error = encode_cem6_10( + astc_helpers::CEM_LDR_RGB_BASE_SCALE, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + rgba_endpoint_vals, rgb_weight_vals, UINT64_MAX); + + assert(rgb_blk_error != UINT64_MAX); + + if (rgb_blk_error == UINT64_MAX) + return cur_blk_error; + + const auto& endpoint_quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_val_to_ise; + + rgba_endpoint_vals[4] = endpoint_quant_tab[pixel_stats.m_min[3]]; + rgba_endpoint_vals[5] = endpoint_quant_tab[pixel_stats.m_max[3]]; + + uint64_t rgba_blk_error = eval_solution_dp( + pixel_stats, + cem_index, 3, + rgba_endpoint_vals, endpoint_ise_range, + rgb_weight_vals, a_weight_vals, weight_ise_range, + enc_params); + + assert(rgba_blk_error != UINT64_MAX); + + if (rgba_blk_error < cur_blk_error) + { + cur_blk_error = rgba_blk_error; + memcpy(pEndpoint_vals, rgba_endpoint_vals, astc_helpers::NUM_MODE10_ENDPOINTS); + memcpy(pWeight_vals0, rgb_weight_vals, pixel_stats.m_num_pixels); + memcpy(pWeight_vals1, a_weight_vals, pixel_stats.m_num_pixels); + + if (!cur_blk_error) + return cur_blk_error; + } + + float bounds_min = pixel_stats.m_min_f[3], bounds_max = pixel_stats.m_max_f[3]; + if (bounds_min != bounds_max) + { + float a_vals[ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + a_vals[i] = pixel_stats.m_pixels_f[i][3]; + + const uint32_t TOTAL_PASSES = 2; + + uint8_t trial_rgba_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t trial_rgb_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_a_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + for (uint32_t pass = 0; pass < TOTAL_PASSES; pass++) + { + float xl = 0.0f, xh = 0.0f; + + if (compute_least_squares_endpoints_1D( + pixel_stats.m_num_pixels, pass ? trial_a_weight_vals : a_weight_vals, get_ls_weights_ise(weight_ise_range), + &xl, &xh, a_vals, bounds_min, bounds_max)) + { + memcpy(trial_rgba_endpoint_vals, rgba_endpoint_vals, astc_helpers::NUM_MODE10_ENDPOINTS); + + trial_rgba_endpoint_vals[4] = precise_round_bise_endpoint_val(xl, endpoint_ise_range); + trial_rgba_endpoint_vals[5] = precise_round_bise_endpoint_val(xh, endpoint_ise_range); + + uint64_t trial_rgba_blk_error = eval_solution_dp( + pixel_stats, + cem_index, 3, + trial_rgba_endpoint_vals, endpoint_ise_range, + trial_rgb_weight_vals, trial_a_weight_vals, weight_ise_range, + enc_params); + + assert(trial_rgba_blk_error != UINT64_MAX); + + if (trial_rgba_blk_error < cur_blk_error) + { + cur_blk_error = trial_rgba_blk_error; + memcpy(pEndpoint_vals, trial_rgba_endpoint_vals, astc_helpers::NUM_MODE10_ENDPOINTS); + memcpy(pWeight_vals0, trial_rgb_weight_vals, pixel_stats.m_num_pixels); + memcpy(pWeight_vals1, trial_a_weight_vals, pixel_stats.m_num_pixels); + } + else + { + break; + } + } + else + { + break; + } + } // pass + } + + return cur_blk_error; + } + + // rgb/rgba base+scale, dual plane rgb (not a!) + static uint64_t encode_cem6_10_dp_rgb( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error) + { + assert(g_initialized); + assert((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)); + assert(ccs_index <= 2); + assert((pixel_stats.m_num_pixels) && (pixel_stats.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert(((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || (weight_ise_range == astc_helpers::BISE_64_LEVELS)); + assert(pWeight_vals0 && pWeight_vals1); + + // First solve using a single plane, then we'll introduce the other plane's weights and tune the encoded H/s values + uint8_t sp_endpoint_vals[astc_helpers::NUM_MODE10_ENDPOINTS] = { 0 }; + uint8_t sp_weight_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + + uint64_t sp_block_err = encode_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + sp_endpoint_vals, sp_weight_vals, UINT64_MAX); + + assert(sp_block_err != UINT64_MAX); + BASISU_NOTE_UNUSED(sp_block_err); + + // Now compute both plane's weights using the initial H/s values + uint8_t trial_weights0_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint8_t trial_weights1_vals[ASTC_LDR_MAX_BLOCK_PIXELS] = { 0 }; + uint64_t dp_blk_error = eval_solution_dp( + pixel_stats, + cem_index, ccs_index, + sp_endpoint_vals, endpoint_ise_range, + trial_weights0_vals, trial_weights1_vals, weight_ise_range, + enc_params); + + if (dp_blk_error < cur_blk_error) + { + cur_blk_error = dp_blk_error; + memcpy(pEndpoint_vals, sp_endpoint_vals, astc_helpers::NUM_MODE10_ENDPOINTS); + memcpy(pWeight_vals0, trial_weights0_vals, pixel_stats.m_num_pixels); + memcpy(pWeight_vals1, trial_weights1_vals, pixel_stats.m_num_pixels); + + if (!cur_blk_error) + return cur_blk_error; + } + + // Compute refined H/s values using the current weights. + const float MAX_S = 255.0f / 256.0f; + const float EPS = 1e-6f; + + vec3F Pa(0.0f); // (Pa_r,Pa_g,Pa_b) + vec3F Pb(0.0f); // (Pb_r,Pb_g,Pb_b) + float A[3] = { 0 }, B[3] = { 0 }, C[3] = { 0 }; // per-channel + + // invalid on raw weights + const auto& dequant_weights_tab = astc_helpers::g_dequant_tables.get_weight_tab(minimum(astc_helpers::BISE_32_LEVELS, weight_ise_range)).m_ISE_to_val; + + for (uint32_t i = 0; i < pixel_stats.m_num_pixels; i++) + { + float w0, w1; + if (weight_ise_range == astc_helpers::BISE_64_LEVELS) + { + w0 = (float)trial_weights0_vals[i] * (1.0f / 64.0f); + w1 = (float)trial_weights1_vals[i] * (1.0f / 64.0f); + } + else + { + w0 = dequant_weights_tab[trial_weights0_vals[i]] * (1.0f / 64.0f); + w1 = dequant_weights_tab[trial_weights1_vals[i]] * (1.0f / 64.0f); + } + + float w[3] = { w0, w0, w0 }; + w[ccs_index] = w1; + + const vec3F& p = pixel_stats.m_pixels_f[i]; + + for (int c = 0; c < 3; ++c) + { + const float a = 1.0f - w[c]; + const float b = w[c]; + + Pa[c] += a * p[c]; + Pb[c] += b * p[c]; + A[c] += a * a; + B[c] += a * b; + C[c] += b * b; + } // c + } // i + + color_rgba actual_l(0), actual_h(0); + float actual_scale = 0; + decode_endpoints(cem_index, sp_endpoint_vals, endpoint_ise_range, actual_l, actual_h, &actual_scale); + + vec3F H((float)actual_h[0], (float)actual_h[1], (float)actual_h[2]); + H *= (1.0f / 255.0f); + + const float S1 = H[0] * Pa[0] + H[1] * Pa[1] + H[2] * Pa[2]; + float S2 = 0.0f, S3 = 0.0f; + for (int c = 0; c < 3; c++) + { + const float H2 = H[c] * H[c]; + S2 += H2 * A[c]; + S3 += H2 * B[c]; + } + + float new_s = actual_scale; + if (S2 > EPS) + new_s = (S1 - S3) / S2; + + new_s = clamp(new_s, 0.0f, MAX_S); + + vec3F new_H(0.0f); + for (int c = 0; c < 3; ++c) + { + const float den = A[c] * new_s * new_s + 2.0f * B[c] * new_s + C[c]; + + float Hc = 0.0f; + if (den > EPS) + { + const float num = Pb[c] + new_s * Pa[c]; + Hc = num / den; + } + new_H[c] = Hc; + } + + bool improved_flag = try_cem6_10_dp( + cem_index, ccs_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + new_s, (float)actual_l[3] * (1.0f / 255.0f), vec4F(new_H[0], new_H[1], new_H[2], (float)actual_h[3] * (1.0f / 255.0f)), + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error); + (void)improved_flag; + + return cur_blk_error; + } + + // dispatcher + uint64_t cem_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error, + bool use_blue_contraction, bool *pBase_ofs_clamped_flag) + { + assert(g_initialized); + assert((ccs_index >= -1) && (ccs_index <= 3)); + assert(astc_helpers::is_cem_ldr(cem_index)); + assert(pEndpoint_vals); + assert(pWeight_vals0); + + const bool dual_plane = (ccs_index >= 0); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + uint64_t blk_error = UINT64_MAX; + + switch (cem_index) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: + { + assert(!dual_plane); + + blk_error = encode_cem0_4( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, cur_blk_error); + + break; + } + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + { + if (dual_plane) + { + assert(ccs_index == 3); + assert(pWeight_vals1); + + blk_error = encode_cem4_dp_a( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error); + } + else + { + blk_error = encode_cem0_4( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, cur_blk_error); + } + break; + } + + case astc_helpers::CEM_LDR_RGB_DIRECT: + case astc_helpers::CEM_LDR_RGBA_DIRECT: + case astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET: + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + if (dual_plane) + { + assert(pWeight_vals1); + blk_error = encode_cem8_12_9_13_dp( + cem_index, ccs_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error, use_blue_contraction, pBase_ofs_clamped_flag); + } + else + { + blk_error = encode_cem8_12_9_13( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, cur_blk_error, use_blue_contraction, pBase_ofs_clamped_flag); + } + break; + } + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: + { + if (dual_plane) + { + assert(ccs_index <= 2); + assert(pWeight_vals1); + + blk_error = encode_cem6_10_dp_rgb( + cem_index, ccs_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error); + } + else + { + blk_error = encode_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, cur_blk_error); + } + break; + } + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + if (dual_plane) + { + assert(pWeight_vals1); + + if (ccs_index == 3) + { + blk_error = encode_cem10_dp_a( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error); + } + else + { + blk_error = encode_cem6_10_dp_rgb( + cem_index, ccs_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, pWeight_vals1, cur_blk_error); + } + } + else + { + blk_error = encode_cem6_10( + cem_index, + pixel_stats, enc_params, + endpoint_ise_range, weight_ise_range, + pEndpoint_vals, pWeight_vals0, cur_blk_error); + } + break; + } + default: + { + assert(0); + break; + } + } + + return blk_error; + } + + //--------------------------------------------------------------------------------------------- + + float surrogate_evaluate_rgba_sp(const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, uint32_t num_weight_levels, + const cem_encode_params& enc_params, uint32_t flags) + { + assert(g_initialized); + assert((ps.m_num_pixels) && (ps.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert(pWeights0); + + const float wr = (float)enc_params.m_comp_weights[0], wg = (float)enc_params.m_comp_weights[1], + wb = (float)enc_params.m_comp_weights[2], wa = (float)enc_params.m_comp_weights[3]; + + float total_err = 0; + + const bool compute_error = ((flags & cFlagNoError) == 0); + + float lr = l[0], lg = l[1], lb = l[2], la = l[3]; + float dr = h[0] - lr, dg = h[1] - lg, db = h[2] - lb, da = h[3] - la; + float delta_col_nrm = dr * dr + dg * dg + db * db + da * da; + + if (flags & cFlagDisableQuant) + { + float f = (float)1.0f / (delta_col_nrm + REALLY_SMALL_FLOAT_VAL); + + lr *= -dr; lg *= -dg; lb *= -db; la *= -da; + + dr *= f; dg *= f; db *= f; da *= f; + float l_sum = (lr + lg + lb + la) * f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float r = p[0], g = p[1], b = p[2], a = p[3]; + + float w = r * dr + g * dg + b * db + a * da + l_sum; + + if (w < 0.0f) + w = 0.0f; + else if (w > 1.0f) + w = 1.0f; + + pWeights0[i] = w; + + if (compute_error) + { + float one_minus_w = 1.0f - w; + + float dec_r = l[0] * one_minus_w + h[0] * w; + float dec_g = l[1] * one_minus_w + h[1] * w; + float dec_b = l[2] * one_minus_w + h[2] * w; + float dec_a = l[3] * one_minus_w + h[3] * w; + + float diff_r = r - dec_r; + float diff_g = g - dec_g; + float diff_b = b - dec_b; + float diff_a = a - dec_a; + + total_err += (wr * diff_r * diff_r) + (wg * diff_g * diff_g) + (wb * diff_b * diff_b) + (wa * diff_a * diff_a); + } + + } // i + } + else + { + const float inv_weight_levels = 1.0f / (float)(num_weight_levels - 1); + + float f = (float)(num_weight_levels - 1) / (delta_col_nrm + REALLY_SMALL_FLOAT_VAL); + + lr *= -dr; lg *= -dg; lb *= -db; la *= -da; + + dr *= f; dg *= f; db *= f; da *= f; + float l_sum_biased = (lr + lg + lb + la) * f + .5f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float r = p[0], g = p[1], b = p[2], a = p[3]; + + float w = (float)fast_floorf_int(r * dr + g * dg + b * db + a * da + l_sum_biased) * inv_weight_levels; + + if (w < 0.0f) + w = 0.0f; + else if (w > 1.0f) + w = 1.0f; + + pWeights0[i] = w; + + if (compute_error) + { + float one_minus_w = 1.0f - w; + + float dec_r = l[0] * one_minus_w + h[0] * w; + float dec_g = l[1] * one_minus_w + h[1] * w; + float dec_b = l[2] * one_minus_w + h[2] * w; + float dec_a = l[3] * one_minus_w + h[3] * w; + + float diff_r = r - dec_r; + float diff_g = g - dec_g; + float diff_b = b - dec_b; + float diff_a = a - dec_a; + + total_err += (wr * diff_r * diff_r) + (wg * diff_g * diff_g) + (wb * diff_b * diff_b) + (wa * diff_a * diff_a); + } + + } // i + } + + return total_err; + + } + + float surrogate_evaluate_rgba_dp(uint32_t ccs_index, const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, float* pWeights1, uint32_t num_weight_levels, + const cem_encode_params& enc_params, uint32_t flags) + { + assert(g_initialized); + assert(ccs_index <= 3); + assert((ps.m_num_pixels) && (ps.m_num_pixels <= ASTC_LDR_MAX_BLOCK_PIXELS)); + assert(pWeights0 && pWeights1); + + const float inv_weight_levels = 1.0f / (float)(num_weight_levels - 1); + + const uint32_t c0 = (ccs_index + 1) & 3, c1 = (ccs_index + 2) & 3, c2 = (ccs_index + 3) & 3; + + const float orig_lx = l[c0], orig_ly = l[c1], orig_lz = l[c2], orig_lw = l[ccs_index]; + const float orig_hx = h[c0], orig_hy = h[c1], orig_hz = h[c2], orig_hw = h[ccs_index]; + + const float wx = (float)enc_params.m_comp_weights[c0], wy = (float)enc_params.m_comp_weights[c1], + wz = (float)enc_params.m_comp_weights[c2], ww = (float)enc_params.m_comp_weights[ccs_index]; + + float total_err = 0; + + const bool compute_error = ((flags & cFlagNoError) == 0); + + if (flags & cFlagDisableQuant) + { + // Plane 0 + { + float dx = orig_hx - orig_lx, dy = orig_hy - orig_ly, dz = orig_hz - orig_lz; + + float delta_col_nrm = dx * dx + dy * dy + dz * dz; + + float f = (float)1.0f / (delta_col_nrm + REALLY_SMALL_FLOAT_VAL); + + float lx = orig_lx, ly = orig_ly, lz = orig_lz; + lx *= -dx; ly *= -dy; lz *= -dz; + + dx *= f; dy *= f; dz *= f; + float l_sum = (lx + ly + lz) * f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float x = p[c0], y = p[c1], z = p[c2]; + + float weight = x * dx + y * dy + z * dz + l_sum; + + if (weight < 0.0f) + weight = 0.0f; + else if (weight > 1.0f) + weight = 1.0f; + + pWeights0[i] = weight; + + if (compute_error) + { + float one_minus_weight = 1.0f - weight; + + float dec_x = orig_lx * one_minus_weight + orig_hx * weight; + float dec_y = orig_ly * one_minus_weight + orig_hy * weight; + float dec_z = orig_lz * one_minus_weight + orig_hz * weight; + + float diff_x = x - dec_x; + float diff_y = y - dec_y; + float diff_z = z - dec_z; + + total_err += (wx * diff_x * diff_x) + (wy * diff_y * diff_y) + (wz * diff_z * diff_z); + } + + } // i + } + + // Plane 1 + { + const float delta_w = orig_hw - orig_lw; + const float f = (fabsf(delta_w) > REALLY_SMALL_FLOAT_VAL) ? (1.0f / delta_w) : 0.0f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float w = p[ccs_index]; + + float weight = (w - orig_lw) * f; + + if (weight < 0.0f) + weight = 0.0f; + else if (weight > 1.0f) + weight = 1.0f; + + pWeights1[i] = weight; + + if (compute_error) + { + // Error for DP here is 0 if there's no quant and L/H are sufficient to cover the entire span. + if ((w < orig_lw) || (w > orig_hw)) + { + float one_minus_weight = 1.0f - weight; + + float dec_w = orig_lw * one_minus_weight + orig_hw * weight; + + float diff_w = w - dec_w; + + total_err += (ww * diff_w * diff_w); + } + } + + } // i + } + } + else + { + // Plane 0 + { + float dx = orig_hx - orig_lx, dy = orig_hy - orig_ly, dz = orig_hz - orig_lz; + + float delta_col_nrm = dx * dx + dy * dy + dz * dz; + + float f = (float)(num_weight_levels - 1) / (delta_col_nrm + REALLY_SMALL_FLOAT_VAL); + + float lx = orig_lx, ly = orig_ly, lz = orig_lz; + lx *= -dx; ly *= -dy; lz *= -dz; + + dx *= f; dy *= f; dz *= f; + float l_sum_biased = (lx + ly + lz) * f + .5f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float x = p[c0], y = p[c1], z = p[c2]; + + float weight = (float)fast_floorf_int(x * dx + y * dy + z * dz + l_sum_biased) * inv_weight_levels; + + if (weight < 0.0f) + weight = 0.0f; + else if (weight > 1.0f) + weight = 1.0f; + + pWeights0[i] = weight; + + if (compute_error) + { + float one_minus_weight = 1.0f - weight; + + float dec_x = orig_lx * one_minus_weight + orig_hx * weight; + float dec_y = orig_ly * one_minus_weight + orig_hy * weight; + float dec_z = orig_lz * one_minus_weight + orig_hz * weight; + + float diff_x = x - dec_x; + float diff_y = y - dec_y; + float diff_z = z - dec_z; + + total_err += (wx * diff_x * diff_x) + (wy * diff_y * diff_y) + (wz * diff_z * diff_z); + } + + } // i + } + + // Plane 1 + { + const float delta_w = orig_hw - orig_lw; + const float f = (fabs(delta_w) > REALLY_SMALL_FLOAT_VAL) ? ((float)(num_weight_levels - 1) / delta_w) : 0.0f; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& p = ps.m_pixels_f[i]; + const float w = p[ccs_index]; + + float weight = (float)fast_floorf_int((w - orig_lw) * f + .5f) * inv_weight_levels; + + if (weight < 0.0f) + weight = 0.0f; + else if (weight > 1.0f) + weight = 1.0f; + + pWeights1[i] = weight; + + if (compute_error) + { + float one_minus_weight = 1.0f - weight; + + float dec_w = orig_lw * one_minus_weight + orig_hw * weight; + + float diff_w = w - dec_w; + + total_err += (ww * diff_w * diff_w); + } + + } // i + } + } + + return total_err; + } + + //--------------------------------------------------------------------------------------------- + + float surrogate_quant_endpoint_val(float e, uint32_t num_endpoint_levels, uint32_t flags) + { + assert((e >= 0.0f) && (e <= 1.0f)); + + if (flags & cFlagDisableQuant) + return e; + + const float endpoint_levels_minus_1 = (float)(num_endpoint_levels - 1); + const float inv_endpoint_levels = 1.0f / endpoint_levels_minus_1; + return (float)fast_roundf_pos_int(e * endpoint_levels_minus_1) * inv_endpoint_levels; + } + + vec4F surrogate_quant_endpoint(const vec4F& e, uint32_t num_endpoint_levels, uint32_t flags) + { + if (flags & cFlagDisableQuant) + return e; + + const float endpoint_levels_minus_1 = (float)(num_endpoint_levels - 1); + const float inv_endpoint_levels = 1.0f / endpoint_levels_minus_1; + + assert((e[0] >= 0.0f) && (e[0] <= 1.0f)); + assert((e[1] >= 0.0f) && (e[1] <= 1.0f)); + assert((e[2] >= 0.0f) && (e[2] <= 1.0f)); + assert((e[3] >= 0.0f) && (e[3] <= 1.0f)); + + vec4F res; + res[0] = (float)fast_roundf_pos_int(e[0] * endpoint_levels_minus_1) * inv_endpoint_levels; + res[1] = (float)fast_roundf_pos_int(e[1] * endpoint_levels_minus_1) * inv_endpoint_levels; + res[2] = (float)fast_roundf_pos_int(e[2] * endpoint_levels_minus_1) * inv_endpoint_levels; + res[3] = (float)fast_roundf_pos_int(e[3] * endpoint_levels_minus_1) * inv_endpoint_levels; + + return res; + } + + static uint32_t get_num_weight_levels(uint32_t weight_ise_range) + { + // astc_helpers::BISE_64_LEVELS=raw weights ([0,64], NOT [0,63]) + const uint32_t num_weight_levels = (weight_ise_range == astc_helpers::BISE_64_LEVELS) ? 65 : astc_helpers::get_ise_levels(weight_ise_range); + return num_weight_levels; + } + + //--------------------------------------------------------------------------------------------- + + static float cem_surrogate_encode_cem6_10_sp( + uint32_t cem_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float &s, float* pWeights0, uint32_t flags) + { + const uint32_t num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + // astc_helpers::BISE_64_LEVELS=raw weights ([0,64], NOT [0,63]) + const uint32_t num_weight_levels = get_num_weight_levels(weight_ise_range); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + + float d_min = BIG_FLOAT_VAL, d_max = -BIG_FLOAT_VAL; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F p(ps.m_pixels_f[i]); + + float dot = p.dot3(ps.m_zero_rel_axis3); + + if (dot < d_min) + d_min = dot; + + if (dot > d_max) + d_max = dot; + } + + vec3F low_color3_f(d_min * ps.m_zero_rel_axis3); + low_color3_f.clamp(0.0f, 1.0f); + + vec3F high_color3_f(d_max * ps.m_zero_rel_axis3); + high_color3_f.clamp(0.0f, 1.0f); + + const float MAX_S = 255.0f / 256.0f; + + float scale = MAX_S; + + float d = low_color3_f.dot(high_color3_f); + float nrm = high_color3_f.norm(); + if (nrm > 0.0f) + scale = d / nrm; + + scale = clamp(scale, 0.0f, MAX_S); + + scale = surrogate_quant_endpoint_val(scale * (256.0f / 255.0f), num_endpoint_levels, flags); + + s = scale; + + high_endpoint = surrogate_quant_endpoint(vec4F(high_color3_f[0], high_color3_f[1], high_color3_f[2], cem_has_alpha ? ps.m_max_f[3] : 1.0f), num_endpoint_levels, flags); + + low_endpoint = vec4F(high_endpoint[0] * scale, high_endpoint[1] * scale, high_endpoint[2] * scale, cem_has_alpha ? ps.m_min_f[3] : 1.0f); + + return surrogate_evaluate_rgba_sp(ps, low_endpoint, high_endpoint, pWeights0, num_weight_levels, enc_params, flags); + } + + static float cem_surrogate_encode_cem6_10_dp( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float& s, float* pWeights0, float* pWeights1, uint32_t flags) + { + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + BASISU_NOTE_UNUSED(cem_has_alpha); + + // astc_helpers::BISE_64_LEVELS=raw weights ([0,64], NOT [0,63]) + const uint32_t num_weight_levels = get_num_weight_levels(weight_ise_range); + + assert(cem_has_alpha || (ccs_index <= 2)); + + float temp_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + cem_surrogate_encode_cem6_10_sp( + (ccs_index == 3) ? (uint32_t)astc_helpers::CEM_LDR_RGB_BASE_SCALE : cem_index, + ps, enc_params, endpoint_ise_range, weight_ise_range, low_endpoint, high_endpoint, s, temp_weights, flags); + + if (ccs_index == 3) + { + low_endpoint[3] = ps.m_min_f[3]; + high_endpoint[3] = ps.m_max_f[3]; + } + + return surrogate_evaluate_rgba_dp(ccs_index, ps, low_endpoint, high_endpoint, pWeights0, pWeights1, num_weight_levels, enc_params, flags); + } + + static float cem_surrogate_encode_cem8_12_sp( + uint32_t cem_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float* pWeights0, uint32_t flags) + { + const uint32_t num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + // astc_helpers::BISE_64_LEVELS=raw weights ([0,64], NOT [0,63]) + const uint32_t num_weight_levels = get_num_weight_levels(weight_ise_range); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT); + const uint32_t num_comps = cem_has_alpha ? 4 : 3; + + float d_min = BIG_FLOAT_VAL, d_max = -BIG_FLOAT_VAL; + uint32_t l_idx = 0, h_idx = 0; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F p(ps.m_pixels_f[i] - ps.m_mean_f); + + float dot = cem_has_alpha ? p.dot(ps.m_mean_rel_axis4) : p.dot3(ps.m_mean_rel_axis3); + + if (dot < d_min) + { + d_min = dot; + l_idx = i; + } + + if (dot > d_max) + { + d_max = dot; + h_idx = i; + } + } + + low_endpoint = surrogate_quant_endpoint(ps.m_pixels_f[l_idx], num_endpoint_levels, flags); + high_endpoint = surrogate_quant_endpoint(ps.m_pixels_f[h_idx], num_endpoint_levels, flags); + + if (!cem_has_alpha) + { + low_endpoint[3] = 1.0f; + high_endpoint[3] = 1.0f; + } + + if (low_endpoint.dot(vec4F(1.0f)) > high_endpoint.dot(vec4F(1.0f))) + std::swap(low_endpoint, high_endpoint); + + if ((flags & cFlagDisableQuant) == 0) + { + for (uint32_t i = 0; i < num_comps; i++) + { + if ((low_endpoint[i] == high_endpoint[i]) && (ps.m_min_f[i] != ps.m_max_f[i])) + { + const float inv_endpoint_levels = 1.0f / (float)(num_endpoint_levels - 1); + + float best_dist = BIG_FLOAT_VAL; + float best_l = 0.0f, best_h = 0.0f; + + for (int ld = -2; ld <= 0; ld++) + { + float actual_l = saturate(low_endpoint[i] + (float)ld * inv_endpoint_levels); + + for (int hd = 0; hd <= 2; hd++) + { + float actual_h = saturate(high_endpoint[i] + (float)hd * inv_endpoint_levels); + + float v0 = lerp(actual_l, actual_h, 1.0f / 3.0f); + float v1 = lerp(actual_l, actual_h, 2.0f / 3.0f); + assert(v0 <= v1); + + float dist0 = v0 - ps.m_min_f[0]; + float dist1 = v1 - ps.m_max_f[0]; + + float total_dist = dist0 * dist0 + dist1 * dist1; + if (total_dist < best_dist) + { + best_dist = total_dist; + best_l = actual_l; + best_h = actual_h; + } + } // hd + } // ld + + low_endpoint[i] = best_l; + high_endpoint[i] = best_h; + } + } + } + + return surrogate_evaluate_rgba_sp(ps, low_endpoint, high_endpoint, pWeights0, num_weight_levels, enc_params, flags); + } + + static float cem_surrogate_encode_cem8_12_dp( + uint32_t cem_index, uint32_t ccs_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float* pWeights0, float *pWeights1, uint32_t flags) + { + assert(ccs_index <= 3); + const uint32_t num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + // astc_helpers::BISE_64_LEVELS=raw weights ([0,64], NOT [0,63]) + const uint32_t num_weight_levels = get_num_weight_levels(weight_ise_range); + + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT); + const uint32_t num_comps = cem_has_alpha ? 4 : 3; + + assert(cem_has_alpha || (ccs_index <= 2)); + + vec4F flattened_pixels[ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + flattened_pixels[i] = ps.m_pixels_f[i]; + + flattened_pixels[i][ccs_index] = 0.0f; + + if (!cem_has_alpha) + flattened_pixels[i][3] = 0.0f; + } + + vec4F flattened_pixels_mean(ps.m_mean_f); + flattened_pixels_mean[ccs_index] = 0.0f; + + if (!cem_has_alpha) + flattened_pixels_mean[3] = 0.0f; + + // suppress bogus gcc warning on flattened_pixels +#ifndef __clang__ +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#endif + const vec4F flattened_axis(calc_pca_4D(ps.m_num_pixels, flattened_pixels, flattened_pixels_mean)); + +#ifndef __clang__ +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +#endif + + float best_dl = BIG_FLOAT_VAL, best_dh = -BIG_FLOAT_VAL; + int best_l_index = 0, best_h_index = 0; + + for (uint32_t c = 0; c < ps.m_num_pixels; c++) + { + const vec4F px(flattened_pixels[c] - flattened_pixels_mean); + + float p = px.dot(flattened_axis); + if (p < best_dl) + { + best_dl = p; + best_l_index = c; + } + + if (p > best_dh) + { + best_dh = p; + best_h_index = c; + } + } // c + + vec4F low_color_f(ps.m_pixels_f[best_l_index]), high_color_f(ps.m_pixels_f[best_h_index]); + + low_color_f[ccs_index] = 0.0f; + high_color_f[ccs_index] = 0.0f; + + if (!cem_has_alpha) + { + low_color_f[3] = 1.0f; + high_color_f[3] = 1.0f; + } + + if (low_color_f.dot(vec4F(1.0f)) > high_color_f.dot(vec4F(1.0f))) + std::swap(low_color_f, high_color_f); + + low_color_f[ccs_index] = ps.m_min_f[ccs_index]; + high_color_f[ccs_index] = ps.m_max_f[ccs_index]; + + if (!cem_has_alpha) + { + low_color_f[3] = 1.0f; + high_color_f[3] = 1.0f; + } + + low_endpoint = surrogate_quant_endpoint(low_color_f, num_endpoint_levels, flags); + high_endpoint = surrogate_quant_endpoint(high_color_f, num_endpoint_levels, flags); + + if ((flags & cFlagDisableQuant) == 0) + { + for (uint32_t i = 0; i < num_comps; i++) + { + if ((low_endpoint[i] == high_endpoint[i]) && (ps.m_min_f[i] != ps.m_max_f[i])) + { + const float inv_endpoint_levels = 1.0f / (float)(num_endpoint_levels - 1); + + float best_dist = BIG_FLOAT_VAL; + float best_l = 0.0f, best_h = 0.0f; + + for (int ld = -2; ld <= 0; ld++) + { + float actual_l = saturate(low_endpoint[i] + (float)ld * inv_endpoint_levels); + + for (int hd = 0; hd <= 2; hd++) + { + float actual_h = saturate(high_endpoint[i] + (float)hd * inv_endpoint_levels); + + float v0 = lerp(actual_l, actual_h, 1.0f / 3.0f); + float v1 = lerp(actual_l, actual_h, 2.0f / 3.0f); + assert(v0 <= v1); + + //if (v0 > v1) + // std::swap(v0, v1); + + float dist0 = v0 - ps.m_min_f[0]; + float dist1 = v1 - ps.m_max_f[0]; + + float total_dist = dist0 * dist0 + dist1 * dist1; + if (total_dist < best_dist) + { + best_dist = total_dist; + best_l = actual_l; + best_h = actual_h; + } + } // hd + } // ld + + low_endpoint[i] = best_l; + high_endpoint[i] = best_h; + } + } + } + + return surrogate_evaluate_rgba_dp(ccs_index, ps, low_endpoint, high_endpoint, pWeights0, pWeights1, num_weight_levels, enc_params, flags); + } + + static float cem_surrogate_encode_cem0_4_sp_or_dp( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float* pWeights0, float *pWeights1, uint32_t flags) + { + const bool cem_has_alpha = (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT); + const bool dual_plane = (ccs_index == 3); + + if (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT) + { + assert((ccs_index == -1) || (ccs_index == 3)); + } + else + { + assert(cem_index == astc_helpers::CEM_LDR_LUM_DIRECT); + assert(ccs_index == -1); + } + + const uint32_t num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + const uint32_t num_weight_levels = get_num_weight_levels(weight_ise_range); + + float lum_l = BIG_FLOAT_VAL, lum_h = -BIG_FLOAT_VAL; + + for (uint32_t i = 0; i < ps.m_num_pixels; i++) + { + const vec4F& px = ps.m_pixels_f[i]; + + float l = (px[0] + px[1] + px[2]) * (1.0f / 3.0f); + + lum_l = minimum(lum_l, l); + lum_h = maximum(lum_h, l); + } + + const float a_l = cem_has_alpha ? ps.m_min_f[3] : 1.0f; + const float a_h = cem_has_alpha ? ps.m_max_f[3] : 1.0f; + + low_endpoint.set(lum_l, lum_l, lum_l, a_l); + high_endpoint.set(lum_h, lum_h, lum_h, a_h); + + low_endpoint = surrogate_quant_endpoint(low_endpoint, num_endpoint_levels, flags); + high_endpoint = surrogate_quant_endpoint(high_endpoint, num_endpoint_levels, flags); + + if (dual_plane) + return surrogate_evaluate_rgba_dp(ccs_index, ps, low_endpoint, high_endpoint, pWeights0, pWeights1, num_weight_levels, enc_params, flags); + else + return surrogate_evaluate_rgba_sp(ps, low_endpoint, high_endpoint, pWeights0, num_weight_levels, enc_params, flags); + } + + float cem_surrogate_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& ps, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F &low_endpoint, vec4F &high_endpoint, float &s, float* pWeights0, float* pWeights1, uint32_t flags) + { + assert(g_initialized); + assert((ccs_index >= -1) && (ccs_index <= 3)); + assert(astc_helpers::is_cem_ldr(cem_index)); + assert(pWeights0 && pWeights1); + + const bool dual_plane = (ccs_index >= 0); + + switch (cem_index) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + { + return cem_surrogate_encode_cem0_4_sp_or_dp( + cem_index, ccs_index, + ps, enc_params, + endpoint_ise_range, weight_ise_range, + low_endpoint, high_endpoint, pWeights0, pWeights1, flags); + } + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + if (dual_plane) + { + return cem_surrogate_encode_cem6_10_dp( + cem_index, ccs_index, + ps, enc_params, + endpoint_ise_range, weight_ise_range, + low_endpoint, high_endpoint, s, pWeights0, pWeights1, flags); + } + else + { + return cem_surrogate_encode_cem6_10_sp( + cem_index, + ps, enc_params, + endpoint_ise_range, weight_ise_range, + low_endpoint, high_endpoint, s, pWeights0, flags); + } + break; + } + case astc_helpers::CEM_LDR_RGB_DIRECT: + case astc_helpers::CEM_LDR_RGBA_DIRECT: + { + if (dual_plane) + { + return cem_surrogate_encode_cem8_12_dp( + cem_index, ccs_index, + ps, enc_params, + endpoint_ise_range, weight_ise_range, + low_endpoint, high_endpoint, pWeights0, pWeights1, flags); + } + else + { + return cem_surrogate_encode_cem8_12_sp( + cem_index, + ps, enc_params, + endpoint_ise_range, weight_ise_range, + low_endpoint, high_endpoint, pWeights0, flags); + } + + break; + } + default: + assert(0); + break; + } + + return BIG_FLOAT_VAL; + } + + //--------------------------------------------------------------------------------------------- + + uint8_t g_part3_mapping[NUM_PART3_MAPPINGS][3] = + { + { 0, 1, 2 }, + { 1, 2, 0 }, + { 2, 0, 1 }, + { 0, 2, 1 }, + { 1, 0, 2 }, + { 2, 1, 0 } + }; + + partition_pattern_vec::partition_pattern_vec() + { + clear(); + } + + partition_pattern_vec::partition_pattern_vec(const partition_pattern_vec& other) + { + *this = other; + } + + partition_pattern_vec::partition_pattern_vec(uint32_t width, uint32_t height, const uint8_t *pParts) : + m_width(width), m_height(height) + { + if (pParts) + { + memcpy(m_parts, pParts, get_total()); + } + } + + void partition_pattern_vec::init(uint32_t width, uint32_t height, const uint8_t* pParts) + { + m_width = width; + m_height = height; + if (pParts) + { + const uint32_t num_texels = get_total(); + memcpy(m_parts, pParts, num_texels); + } + } + + void partition_pattern_vec::clear() + { + m_width = 0; + m_height = 0; + memset(m_parts, 0, sizeof(m_parts)); + } + + partition_pattern_vec& partition_pattern_vec::operator= (const partition_pattern_vec& rhs) + { + if (this == &rhs) + return *this; + + m_width = rhs.m_width; + m_height = rhs.m_height; + memcpy(m_parts, rhs.m_parts, get_total()); + + return *this; + } + + // misnamed- just SAD distance, not square + int partition_pattern_vec::get_squared_distance(const partition_pattern_vec& other) const + { + const uint32_t total_pixels = get_total(); + + int total_dist = 0; + for (uint32_t i = 0; i < total_pixels; i++) + total_dist += iabs((int)m_parts[i] - (int)other.m_parts[i]); + + return total_dist; + } + + partition_pattern_vec partition_pattern_vec::get_permuted2(uint32_t permute_index) const + { + assert(permute_index <= 1); + const uint32_t total_pixels = get_total(); + + partition_pattern_vec res(m_width, m_height); + for (uint32_t i = 0; i < total_pixels; i++) + { + assert(m_parts[i] <= 1); + res.m_parts[i] = (uint8_t)(m_parts[i] ^ permute_index); + } + + return res; + } + + partition_pattern_vec partition_pattern_vec::get_permuted3(uint32_t permute_index) const + { + assert(permute_index <= 5); + const uint32_t total_pixels = get_total(); + + partition_pattern_vec res(m_width, m_height); + for (uint32_t i = 0; i < total_pixels; i++) + { + assert(m_parts[i] <= 2); + res.m_parts[i] = g_part3_mapping[permute_index][m_parts[i]]; + } + + return res; + } + + partition_pattern_vec partition_pattern_vec::get_canonicalized() const + { + partition_pattern_vec res(m_width, m_height); + + const uint32_t total_pixels = get_total(); + + int new_labels[4] = { -1, -1, -1, -1 }; + + uint32_t next_index = 0; + for (uint32_t i = 0; i < total_pixels; i++) + { + uint32_t p = m_parts[i]; + assert(p <= 3); + + if (new_labels[p] == -1) + new_labels[p] = next_index++; + + res.m_parts[i] = (uint8_t)new_labels[p]; + } + + return res; + } + + // This requires no redundant patterns, i.e. all must be unique. + bool vp_tree::init(uint32_t n, const partition_pattern_vec* pUnique_pats) + { + clear(); + + uint_vec pat_indices(n); + for (uint32_t i = 0; i < n; i++) + pat_indices[i] = i; + + std::pair root_idx = find_best_vantage_point(n, pUnique_pats, pat_indices); + + if (root_idx.first == -1) + return false; + + m_nodes.resize(1); + m_nodes[0].m_vantage_point = pUnique_pats[root_idx.first]; + m_nodes[0].m_point_index = root_idx.first; + m_nodes[0].m_dist = root_idx.second; + m_nodes[0].m_inner_node = -1; + m_nodes[0].m_outer_node = -1; + + uint_vec inner_list, outer_list; + + inner_list.reserve(n / 2); + outer_list.reserve(n / 2); + + for (uint32_t pat_index = 0; pat_index < n; pat_index++) + { + if ((int)pat_index == root_idx.first) + continue; + + const float dist = m_nodes[0].m_vantage_point.get_distance(pUnique_pats[pat_index]); + + if (dist <= root_idx.second) + inner_list.push_back(pat_index); + else + outer_list.push_back(pat_index); + } + + if (inner_list.size()) + { + m_nodes[0].m_inner_node = create_node(n, pUnique_pats, inner_list); + if (m_nodes[0].m_inner_node < 0) + return false; + } + + if (outer_list.size()) + { + m_nodes[0].m_outer_node = create_node(n, pUnique_pats, outer_list); + if (m_nodes[0].m_outer_node < 0) + return false; + } + + return true; + } + + void vp_tree::find_nearest(uint32_t num_subsets, const partition_pattern_vec& desired_pat, result_queue& results, uint32_t max_results) const + { + assert((num_subsets >= 2) && (num_subsets <= 3)); + + results.clear(); + + if (!m_nodes.size()) + return; + + uint32_t num_desired_pats; + partition_pattern_vec desired_pats[NUM_PART3_MAPPINGS]; + + if (num_subsets == 2) + { + num_desired_pats = 2; + for (uint32_t i = 0; i < 2; i++) + desired_pats[i] = desired_pat.get_permuted2(i); + } + else + { + num_desired_pats = NUM_PART3_MAPPINGS; + for (uint32_t i = 0; i < NUM_PART3_MAPPINGS; i++) + desired_pats[i] = desired_pat.get_permuted3(i); + } + +#if 0 + find_nearest_at_node(0, num_desired_pats, desired_pats, results, max_results); +#else + find_nearest_at_node_non_recursive(0, num_desired_pats, desired_pats, results, max_results); +#endif + } + + void vp_tree::find_nearest_at_node(int node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const + { + float best_dist_to_vantage = BIG_FLOAT_VAL; + uint32_t best_mapping = 0; + for (uint32_t i = 0; i < num_desired_pats; i++) + { + float dist = pDesired_pats[i].get_distance(m_nodes[node_index].m_vantage_point); + if (dist < best_dist_to_vantage) + { + best_dist_to_vantage = dist; + best_mapping = i; + } + } + + result r; + r.m_dist = best_dist_to_vantage; + r.m_mapping_index = best_mapping; + r.m_pat_index = m_nodes[node_index].m_point_index; + + results.insert(r, max_results); + + if (best_dist_to_vantage <= m_nodes[node_index].m_dist) + { + // inner first + if (m_nodes[node_index].m_inner_node >= 0) + find_nearest_at_node(m_nodes[node_index].m_inner_node, num_desired_pats, pDesired_pats, results, max_results); + + if (m_nodes[node_index].m_outer_node >= 0) + { + if ((results.get_size() < max_results) || + ((m_nodes[node_index].m_dist - best_dist_to_vantage) <= results.get_highest_dist()) + ) + { + find_nearest_at_node(m_nodes[node_index].m_outer_node, num_desired_pats, pDesired_pats, results, max_results); + } + } + } + else + { + // outer first + if (m_nodes[node_index].m_outer_node >= 0) + find_nearest_at_node(m_nodes[node_index].m_outer_node, num_desired_pats, pDesired_pats, results, max_results); + + if (m_nodes[node_index].m_inner_node >= 0) + { + if ((results.get_size() < max_results) || + ((best_dist_to_vantage - m_nodes[node_index].m_dist) <= results.get_highest_dist()) + ) + { + find_nearest_at_node(m_nodes[node_index].m_inner_node, num_desired_pats, pDesired_pats, results, max_results); + } + } + } + } + + void vp_tree::find_nearest_at_node_non_recursive(int init_node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const + { + uint_vec node_stack; + node_stack.reserve(16); + node_stack.push_back(init_node_index); + + do + { + const uint32_t node_index = node_stack.back(); + node_stack.pop_back(); + + float best_dist_to_vantage = BIG_FLOAT_VAL; + uint32_t best_mapping = 0; + for (uint32_t i = 0; i < num_desired_pats; i++) + { + float dist = pDesired_pats[i].get_distance(m_nodes[node_index].m_vantage_point); + if (dist < best_dist_to_vantage) + { + best_dist_to_vantage = dist; + best_mapping = i; + } + } + + result r; + r.m_dist = best_dist_to_vantage; + r.m_mapping_index = best_mapping; + r.m_pat_index = m_nodes[node_index].m_point_index; + + results.insert(r, max_results); + + if (best_dist_to_vantage <= m_nodes[node_index].m_dist) + { + if (m_nodes[node_index].m_outer_node >= 0) + { + if ((results.get_size() < max_results) || + ((m_nodes[node_index].m_dist - best_dist_to_vantage) <= results.get_highest_dist()) + ) + { + node_stack.push_back(m_nodes[node_index].m_outer_node); + } + } + + // inner first + if (m_nodes[node_index].m_inner_node >= 0) + { + node_stack.push_back(m_nodes[node_index].m_inner_node); + } + } + else + { + if (m_nodes[node_index].m_inner_node >= 0) + { + if ((results.get_size() < max_results) || + ((best_dist_to_vantage - m_nodes[node_index].m_dist) <= results.get_highest_dist()) + ) + { + node_stack.push_back(m_nodes[node_index].m_inner_node); + } + } + + // outer first + if (m_nodes[node_index].m_outer_node >= 0) + { + node_stack.push_back(m_nodes[node_index].m_outer_node); + } + } + + } while (!node_stack.empty()); + } + + // returns the index of the new node, or -1 on error + int vp_tree::create_node(uint32_t n, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices) + { + std::pair root_idx = find_best_vantage_point(n, pUnique_pats, pat_indices); + + if (root_idx.first < 0) + return -1; + + m_nodes.resize(m_nodes.size() + 1); + const uint32_t new_node_index = m_nodes.size_u32() - 1; + + m_nodes[new_node_index].m_vantage_point = pUnique_pats[root_idx.first]; + m_nodes[new_node_index].m_point_index = root_idx.first; + m_nodes[new_node_index].m_dist = root_idx.second; + m_nodes[new_node_index].m_inner_node = -1; + m_nodes[new_node_index].m_outer_node = -1; + + uint_vec inner_list, outer_list; + + inner_list.reserve(pat_indices.size_u32() / 2); + outer_list.reserve(pat_indices.size_u32() / 2); + + for (uint32_t pat_indices_iter = 0; pat_indices_iter < pat_indices.size(); pat_indices_iter++) + { + const uint32_t pat_index = pat_indices[pat_indices_iter]; + + if ((int)pat_index == root_idx.first) + continue; + + const float dist = m_nodes[new_node_index].m_vantage_point.get_distance(pUnique_pats[pat_index]); + + if (dist <= root_idx.second) + inner_list.push_back(pat_index); + else + outer_list.push_back(pat_index); + } + + if (inner_list.size()) + m_nodes[new_node_index].m_inner_node = create_node(n, pUnique_pats, inner_list); + + if (outer_list.size()) + m_nodes[new_node_index].m_outer_node = create_node(n, pUnique_pats, outer_list); + + return new_node_index; + } + + // returns the pattern index of the vantage point (-1 on error), and the optimal split distance + std::pair vp_tree::find_best_vantage_point(uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices) + { + BASISU_NOTE_UNUSED(num_unique_pats); + + const uint32_t n = pat_indices.size_u32(); + + assert(n); + if (n == 1) + return std::pair(pat_indices[0], 0.0f); + + float best_split_metric = -1.0f; + int best_split_pat = -1; + float best_split_dist = 0.0f; + float best_split_var = 0.0f; + + basisu::vector< std::pair > dists; + dists.reserve(n); + + float_vec float_dists; + float_dists.reserve(n); + + for (uint32_t pat_indices_iter = 0; pat_indices_iter < n; pat_indices_iter++) + { + const uint32_t split_pat_index = pat_indices[pat_indices_iter]; + assert(split_pat_index < num_unique_pats); + + const partition_pattern_vec& trial_vantage = pUnique_pats[split_pat_index]; + + dists.resize(0); + float_dists.resize(0); + + for (uint32_t j = 0; j < n; j++) + { + const uint32_t pat_index = pat_indices[j]; + assert(pat_index < num_unique_pats); + + if (pat_index == split_pat_index) + continue; + + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); + dists.emplace_back(std::pair(dist, pat_index)); + + float_dists.push_back(dist); + } + + stats s; + s.calc(float_dists.size_u32(), float_dists.data()); + + std::sort(dists.begin(), dists.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + + const uint32_t num_dists = dists.size_u32(); + float split_dist = dists[num_dists / 2].first; + if ((num_dists & 1) == 0) + split_dist = (split_dist + dists[(num_dists / 2) - 1].first) * .5f; + + uint32_t total_inner = 0, total_outer = 0; + + for (uint32_t j = 0; j < n; j++) + { + const uint32_t pat_index = pat_indices[j]; + if (pat_index == split_pat_index) + continue; + + float dist = trial_vantage.get_distance(pUnique_pats[pat_index]); + + if (dist <= split_dist) + total_inner++; + else + total_outer++; + } + + float split_metric = (float)minimum(total_inner, total_outer) / (float)maximum(total_inner, total_outer); + + if ((split_metric > best_split_metric) || + ((split_metric == best_split_metric) && (s.m_var > best_split_var))) + { + best_split_metric = split_metric; + best_split_dist = split_dist; + best_split_pat = split_pat_index; + best_split_var = (float)s.m_var; + } + } + + return std::pair(best_split_pat, best_split_dist); + } + + void partitions_data::init(uint32_t num_partitions, uint32_t block_width, uint32_t block_height, bool init_vp_tree) + { + assert((num_partitions >= 2) && (num_partitions <= 4)); + + //const uint32_t total_texels = block_width * block_height; + + m_width = block_width; + m_height = block_height; + m_num_partitions = num_partitions; + + m_part_vp_tree.clear(); + + for (uint32_t i = 0; i < 1024; i++) + { + m_part_seed_to_unique_index[i] = -1; + m_unique_index_to_part_seed[i] = -1; + } + + //const bool is_small_block = astc_helpers::is_small_block(block_width, block_height); + + partition_hash_map part_hash; + part_hash.reserve(1024); + m_total_unique_patterns = 0; + + clear_obj(m_partition_pat_histograms); + + for (uint32_t seed_index = 0; seed_index < astc_helpers::NUM_PARTITION_PATTERNS; seed_index++) + { + partition_pattern_vec pat; + uint32_t part_hist[4] = { 0 }; + + pat.init(block_width, block_height); + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + //const uint8_t p = (uint8_t)astc_helpers::compute_texel_partition(seed_index, x, y, 0, m_num_partitions, is_small_block); + const uint8_t p = (uint8_t)astc_helpers::get_precomputed_texel_partition(block_width, block_height, seed_index, x, y, num_partitions); + + assert((p < m_num_partitions) && (p < 4)); + + pat(x, y) = p; + + part_hist[p]++; + } // x + } // y + + bool skip_pat = false; + for (uint32_t i = 0; i < m_num_partitions; i++) + { + if (!part_hist[i]) + { + skip_pat = true; + break; + } + } + if (skip_pat) + continue; + + partition_pattern_vec std_pat(pat.get_canonicalized()); + + if (part_hash.contains(std_pat)) + continue; + + if (num_partitions == 2) + { + assert(!part_hash.contains(pat)); + assert(!part_hash.contains(pat.get_permuted2(1))); + } + else if (num_partitions == 3) + { + for (uint32_t i = 0; i < partition_pattern_vec::cMaxPermute3Index; i++) + { + assert(!part_hash.contains(pat.get_permuted3(i))); + } + } + + for (uint32_t c = 0; c < 4; c++) + m_partition_pat_histograms[m_total_unique_patterns].m_hist[c] = (uint8_t)part_hist[c]; + + part_hash.insert(std_pat, std::make_pair(seed_index, m_total_unique_patterns)); + + m_part_seed_to_unique_index[seed_index] = (int16_t)m_total_unique_patterns; + m_unique_index_to_part_seed[m_total_unique_patterns] = (int16_t)seed_index; + + m_partition_pats[m_total_unique_patterns] = pat; + + m_total_unique_patterns++; + + } // seed_index + + if (init_vp_tree) + m_part_vp_tree.init(m_total_unique_patterns, m_partition_pats); + } + +} // namespace astc_ldr + +} // namespace basisu diff --git a/external/basis_universal/encoder/basisu_astc_ldr_common.h b/external/basis_universal/encoder/basisu_astc_ldr_common.h new file mode 100644 index 0000000000..76e7e3f1ff --- /dev/null +++ b/external/basis_universal/encoder/basisu_astc_ldr_common.h @@ -0,0 +1,445 @@ +// File: basisu_astc_ldr_common.h +#pragma once +#include "basisu_enc.h" +#include "basisu_gpu_texture.h" +#include + +namespace basisu +{ + +namespace astc_ldr +{ + const uint32_t ASTC_LDR_MAX_BLOCK_WIDTH = astc_helpers::MAX_BLOCK_DIM; // 12 + const uint32_t ASTC_LDR_MAX_BLOCK_HEIGHT = astc_helpers::MAX_BLOCK_DIM; // 12 + const uint32_t ASTC_LDR_MAX_BLOCK_PIXELS = astc_helpers::MAX_BLOCK_PIXELS; // 144 + const uint32_t ASTC_LDR_MAX_RAW_WEIGHTS = astc_helpers::MAX_WEIGHT_INTERPOLANT_VALUE + 1; // 65 + + const uint32_t WEIGHT_REFINER_MAX_PASSES = 17; + + inline basist::color_rgba convert_to_basist_color_rgba(const color_rgba& c) + { + return basist::color_rgba(c.r, c.g, c.b, c.a); + } + + struct cem_encode_params + { + uint32_t m_comp_weights[4]; + bool m_decode_mode_srgb; // todo: store astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8 instead, also the alpha mode for srgb because the decoders are broken + + const uint8_t* m_pForced_weight_vals0; + const uint8_t* m_pForced_weight_vals1; + + uint32_t m_max_ls_passes, m_total_weight_refine_passes; + bool m_worst_weight_nudging_flag; + bool m_endpoint_refinement_flag; + + cem_encode_params() + { + init(); + } + + void init() + { + m_comp_weights[0] = 1; + m_comp_weights[1] = 1; + m_comp_weights[2] = 1; + m_comp_weights[3] = 1; + + m_decode_mode_srgb = true; + + m_pForced_weight_vals0 = nullptr; + m_pForced_weight_vals1 = nullptr; + + m_max_ls_passes = 3; + m_total_weight_refine_passes = 0; + m_worst_weight_nudging_flag = false; + m_endpoint_refinement_flag = false; + } + + float get_total_comp_weights() const + { + return (float)(m_comp_weights[0] + m_comp_weights[1] + m_comp_weights[2] + m_comp_weights[3]); + } + }; + + struct pixel_stats_t + { + uint32_t m_num_pixels; + + color_rgba m_pixels[ASTC_LDR_MAX_BLOCK_PIXELS]; + vec4F m_pixels_f[ASTC_LDR_MAX_BLOCK_PIXELS]; + + color_rgba m_min, m_max; + + vec4F m_min_f, m_max_f; + vec4F m_mean_f; + + // Always 3D, ignoring alpha + vec3F m_mean_rel_axis3; + vec3F m_zero_rel_axis3; + + // Always 4D + vec4F m_mean_rel_axis4; + + bool m_has_alpha; + + stats m_rgba_stats[4]; + + void clear() + { + clear_obj(*this); + } + + void init(uint32_t num_pixels, const color_rgba* pPixels); + + }; // struct struct pixel_stats + + void global_init(); + + void bit_transfer_signed_enc(int& a, int& b); + void bit_transfer_signed_dec(int& a, int& b); // transfers MSB from a to b, a is then [-32,31] + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b); + int quant_preserve2(uint32_t ise_range, uint32_t v); + + uint32_t get_colors(const color_rgba& l, const color_rgba& h, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb); + uint32_t get_colors_raw_weights(const color_rgba& l, const color_rgba& h, color_rgba* pColors, bool decode_mode_srgb); + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h); // assume BISE 20 + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale = nullptr); + uint32_t get_colors(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, uint32_t weight_ise_index, color_rgba* pColors, bool decode_mode_srgb); + uint32_t get_colors_raw_weights(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba* pColors, bool decode_mode_srgb); + + //int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta); + int apply_delta_to_bise_weight_val(uint32_t weight_ise_range, int ise_val, int delta); + + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution_dp( + uint32_t ccs_index, + const pixel_stats_t& pixel_stats, + uint32_t total_weights, const color_rgba* pWeight_colors, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params); + + uint64_t eval_solution_dp( + const pixel_stats_t& pixel_stats, + uint32_t cem_index, uint32_t ccs_index, + const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, + uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint32_t weight_ise_index, + const cem_encode_params& params); + + //bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + //bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + //bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + + uint64_t cem_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint8_t* pEndpoint_vals, uint8_t* pWeight_vals0, uint8_t* pWeight_vals1, uint64_t cur_blk_error, + bool use_blue_contraction, bool* pBase_ofs_clamped_flag); + + // TODO: Rename, confusing vs. std::vector or basisu::vector or vec4F etc. + struct partition_pattern_vec + { + uint32_t m_width, m_height; + uint8_t m_parts[ASTC_LDR_MAX_BLOCK_PIXELS]; + + partition_pattern_vec(); + + partition_pattern_vec(const partition_pattern_vec& other); + + partition_pattern_vec(uint32_t width, uint32_t height, const uint8_t* pParts = nullptr); + + void init(uint32_t width, uint32_t height, const uint8_t* pParts = nullptr); + + void init_part_hist(); + + void clear(); + + partition_pattern_vec& operator= (const partition_pattern_vec& rhs); + + uint32_t get_width() const { return m_width; } + uint32_t get_height() const { return m_height; } + uint32_t get_total() const { return m_width * m_height; } + + uint8_t operator[] (uint32_t i) const { assert(i < get_total()); return m_parts[i]; } + uint8_t& operator[] (uint32_t i) { assert(i < get_total()); return m_parts[i]; } + + uint8_t operator() (uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); return m_parts[x + y * m_width]; } + uint8_t& operator() (uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height)); return m_parts[x + y * m_width]; } + + int get_squared_distance(const partition_pattern_vec& other) const; + + float get_distance(const partition_pattern_vec& other) const + { + return sqrtf((float)get_squared_distance(other)); + } + + enum { cMaxPermute2Index = 1 }; + partition_pattern_vec get_permuted2(uint32_t permute_index) const; + + enum { cMaxPermute3Index = 5 }; + partition_pattern_vec get_permuted3(uint32_t permute_index) const; + + partition_pattern_vec get_canonicalized() const; + + bool operator== (const partition_pattern_vec& rhs) const + { + if ((m_width != rhs.m_width) || (m_height != rhs.m_height)) + return false; + + return memcmp(m_parts, rhs.m_parts, get_total()) == 0; + } + + operator size_t() const + { + return basist::hash_hsieh(m_parts, get_total()); + } + }; + + struct vp_tree_node + { + partition_pattern_vec m_vantage_point; + uint32_t m_point_index; + float m_dist; + + int m_inner_node, m_outer_node; + }; + + const uint32_t NUM_PART3_MAPPINGS = 6; + extern uint8_t g_part3_mapping[NUM_PART3_MAPPINGS][3]; + + class vp_tree + { + public: + vp_tree() + { + } + + void clear() + { + m_nodes.clear(); + } + + // This requires no redundant patterns, i.e. all must be unique. + bool init(uint32_t n, const partition_pattern_vec* pUnique_pats); + + struct result + { + uint32_t m_pat_index; + uint32_t m_mapping_index; + float m_dist; + + bool operator< (const result& rhs) const { return m_dist < rhs.m_dist; } + bool operator> (const result& rhs) const { return m_dist > rhs.m_dist; } + }; + + class result_queue + { + enum { MaxSupportedSize = 512 + 1 }; + + public: + result_queue() : + m_cur_size(0) + { + } + + size_t get_size() const + { + return m_cur_size; + } + + bool empty() const + { + return !m_cur_size; + } + + typedef std::array result_array_type; + + const result_array_type& get_elements() const { return m_elements; } + result_array_type& get_elements() { return m_elements; } + + void clear() + { + m_cur_size = 0; + } + + void reserve(uint32_t n) + { + BASISU_NOTE_UNUSED(n); + } + + const result& top() const + { + assert(m_cur_size); + return m_elements[1]; + } + + bool insert(const result& val, uint32_t max_size) + { + assert(max_size < MaxSupportedSize); + + if (m_cur_size >= MaxSupportedSize) + return false; + + m_elements[++m_cur_size] = val; + up_heap(m_cur_size); + + if (m_cur_size > max_size) + pop(); + + return true; + } + + bool pop() + { + if (m_cur_size == 0) + return false; + + m_elements[1] = m_elements[m_cur_size--]; + down_heap(1); + return true; + } + + float get_highest_dist() const + { + if (!m_cur_size) + return 0.0f; + + return top().m_dist; + } + + private: + result_array_type m_elements; + size_t m_cur_size; + + void up_heap(size_t index) + { + while ((index > 1) && (m_elements[index] > m_elements[index >> 1])) + { + std::swap(m_elements[index], m_elements[index >> 1]); + index >>= 1; + } + } + + void down_heap(size_t index) + { + for (; ; ) + { + size_t largest = index, left_child = 2 * index, right_child = 2 * index + 1; + + if ((left_child <= m_cur_size) && (m_elements[left_child] > m_elements[largest])) + largest = left_child; + + if ((right_child <= m_cur_size) && (m_elements[right_child] > m_elements[largest])) + largest = right_child; + + if (largest == index) + break; + + std::swap(m_elements[index], m_elements[largest]); + index = largest; + } + } + }; + + void find_nearest(uint32_t num_subsets, const partition_pattern_vec& desired_pat, result_queue& results, uint32_t max_results) const; + + private: + basisu::vector m_nodes; + + void find_nearest_at_node(int node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const; + + void find_nearest_at_node_non_recursive(int init_node_index, uint32_t num_desired_pats, const partition_pattern_vec* pDesired_pats, result_queue& results, uint32_t max_results) const; + + // returns the index of the new node, or -1 on error + int create_node(uint32_t n, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices); + + // returns the pattern index of the vantage point (-1 on error), and the optimal split distance + std::pair find_best_vantage_point(uint32_t num_unique_pats, const partition_pattern_vec* pUnique_pats, const uint_vec& pat_indices); + }; + + typedef basisu::hash_map > partition_hash_map; + + struct partition_pattern_hist + { + uint8_t m_hist[4]; + + partition_pattern_hist() { clear(); } + + void clear() { clear_obj(m_hist); } + }; + + struct partitions_data + { + uint32_t m_width, m_height, m_num_partitions; + partition_pattern_vec m_partition_pats[astc_helpers::NUM_PARTITION_PATTERNS]; // indexed by unique index, NOT the 10-bit ASTC seed/pattern index + + partition_pattern_hist m_partition_pat_histograms[astc_helpers::NUM_PARTITION_PATTERNS]; // indexed by unique index, histograms of each pattern + + // ASTC seed to unique index and vice versa + int16_t m_part_seed_to_unique_index[astc_helpers::NUM_PARTITION_PATTERNS]; + int16_t m_unique_index_to_part_seed[astc_helpers::NUM_PARTITION_PATTERNS]; + + // Total number of unique patterns + uint32_t m_total_unique_patterns; + + // VP tree used to rapidly find nearby/similar patterns. + vp_tree m_part_vp_tree; + + void init(uint32_t num_partitions, uint32_t block_width, uint32_t block_height, bool init_vp_tree = true); + }; + + float surrogate_quant_endpoint_val(float e, uint32_t num_endpoint_levels, uint32_t flags); + vec4F surrogate_quant_endpoint(const vec4F& e, uint32_t num_endpoint_levels, uint32_t flags); + + float surrogate_evaluate_rgba_sp(const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, uint32_t num_weight_levels, const cem_encode_params& enc_params, uint32_t flags); + float surrogate_evaluate_rgba_dp(uint32_t ccs_index, const pixel_stats_t& ps, const vec4F& l, const vec4F& h, float* pWeights0, float* pWeights1, uint32_t num_weight_levels, const cem_encode_params& enc_params, uint32_t flags); + + enum + { + cFlagDisableQuant = 1, + cFlagNoError = 2 + } + ; + float cem_surrogate_encode_pixels( + uint32_t cem_index, int ccs_index, + const pixel_stats_t& pixel_stats, const cem_encode_params& enc_params, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + vec4F& low_endpoint, vec4F& high_endpoint, float& s, float* pWeights0, float* pWeights1, uint32_t flags = 0); + +#if 0 + bool requantize_ise_endpoints(uint32_t cem, + uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, + uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints); + + uint32_t get_base_cem_without_alpha(uint32_t cem); + + bool pack_base_offset( + uint32_t cem_index, uint32_t dst_ise_endpoint_range, uint8_t* pPacked_endpoints, + const color_rgba& l, const color_rgba& h, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag, bool& endpoints_swapped); + + bool convert_endpoints_across_cems( + uint32_t prev_cem, uint32_t prev_endpoint_ise_range, const uint8_t* pPrev_endpoints, + uint32_t dst_cem, uint32_t dst_endpoint_ise_range, uint8_t* pDst_endpoints, + bool always_repack, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag); +#endif + +} // namespace astc_ldr + +} // namespace basisu diff --git a/external/basis_universal/encoder/basisu_astc_ldr_encode.cpp b/external/basis_universal/encoder/basisu_astc_ldr_encode.cpp new file mode 100644 index 0000000000..302cb2e386 --- /dev/null +++ b/external/basis_universal/encoder/basisu_astc_ldr_encode.cpp @@ -0,0 +1,11098 @@ +// File: basisu_astc_ldr_encode.cpp +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" +#include "basisu_astc_ldr_encode.h" +#include "basisu_astc_hdr_common.h" +#include "basisu_astc_ldr_common.h" +#include "3rdparty/android_astc_decomp.h" + +// pick up BASISD_SUPPORT_KTX2_ZSTD macro (this defines it automatically and sets to 1 if not defined) +#include "../transcoder/basisu_transcoder.h" + +#include + +#ifndef BASISD_SUPPORT_KTX2_ZSTD +#error BASISD_SUPPORT_KTX2_ZSTD must be defined here +#endif + +#if BASISD_SUPPORT_KTX2_ZSTD +#include "../zstd/zstd.h" +#endif + +namespace basisu { +namespace astc_ldr { + +const bool g_devel_messages = true; +const bool ASTC_LDR_CONSISTENCY_CHECKING = true; + +bool g_initialized; + +const uint32_t EXPECTED_SUPERBUCKET_HASH_SIZE = 8192; +const uint32_t EXPECTED_SHORTLIST_HASH_SIZE = 4096; + +const uint32_t MAX_BASE_PARTS2 = 128; +const uint32_t MAX_BASE_PARTS3 = 128; + +const uint32_t PART_ESTIMATE_STAGE1_MULTIPLIER = 4; + +const uint32_t MAX_WIDTH = 65535, MAX_HEIGHT = 65535; + +void code_block_weights( + basist::astc_ldr_t::grid_weight_dct &gw_dct, + float q, uint32_t plane_index, + const astc_helpers::log_astc_block& log_blk, + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data, + basisu::bitwise_coder& c, + basist::astc_ldr_t::dct_syms& syms) +{ + assert(q > 0.0f); + + syms.clear(); + + const uint32_t grid_width = log_blk.m_grid_width, grid_height = log_blk.m_grid_height; + const uint32_t total_grid_samples = grid_width * grid_height; + const uint32_t num_planes = log_blk.m_dual_plane ? 2 : 1; + + //const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_ISE_to_val; + //const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_val_to_ise; + + uint8_t dequantized_raw_weights0[astc_helpers::MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < grid_width * grid_height; i++) + dequantized_raw_weights0[i] = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_ISE_to_val[log_blk.m_weights[i * num_planes + plane_index]]; + + auto grid_dim_vals_iter = gw_dct.m_grid_dim_key_vals.find(basist::astc_ldr_t::grid_dim_key(grid_width, grid_height)); + assert(grid_dim_vals_iter != gw_dct.m_grid_dim_key_vals.end()); + + auto& grid_dim_vals = grid_dim_vals_iter->second; + + float orig_weights[astc_helpers::MAX_BLOCK_PIXELS]; + float weight_sum = 0; + for (uint32_t y = 0; y < grid_height; y++) + { + for (uint32_t x = 0; x < grid_width; x++) + { + orig_weights[x + y * grid_width] = dequantized_raw_weights0[x + y * grid_width]; + weight_sum += orig_weights[x + y * grid_width]; + } + } + + float scaled_weight_coding_scale = basist::astc_ldr_t::SCALED_WEIGHT_BASE_CODING_SCALE; + if (log_blk.m_weight_ise_range <= astc_helpers::BISE_8_LEVELS) + scaled_weight_coding_scale = 1.0f / 8.0f; + + float scaled_mean_weight = std::round((float)scaled_weight_coding_scale * (weight_sum / total_grid_samples)); + scaled_mean_weight = basisu::clamp(scaled_mean_weight, 0.0f, 64.0f * (float)scaled_weight_coding_scale); + + float mean_weight = scaled_mean_weight / (float)scaled_weight_coding_scale; + + for (uint32_t y = 0; y < grid_height; y++) + for (uint32_t x = 0; x < grid_width; x++) + orig_weights[x + y * grid_width] -= mean_weight; + + const float span_len = gw_dct.get_max_span_len(log_blk, plane_index); + + float dct_weights[astc_helpers::MAX_BLOCK_PIXELS]; + + // TODO - temp alloc + basist::astc_ldr_t::fvec dct_work; + grid_dim_vals.m_dct.forward(orig_weights, dct_weights, dct_work); + + const float level_scale = gw_dct.compute_level_scale(q, span_len, pGrid_data->m_weight_gamma, grid_width, grid_height, log_blk.m_weight_ise_range); + + int dct_quant_tab[astc_helpers::MAX_BLOCK_PIXELS]; + gw_dct.compute_quant_table(q, grid_width, grid_height, level_scale, dct_quant_tab); + +#if defined(DEBUG) || defined(_DEBUG) + // sanity checking + basist::astc_ldr_t::sample_quant_table_state quant_state; + quant_state.init(q, gw_dct.m_block_width, gw_dct.m_block_height, level_scale); +#endif + + c.put_truncated_binary((int)scaled_mean_weight, (uint32_t)(64.0f * scaled_weight_coding_scale) + 1); + + syms.m_dc_sym = (int)scaled_mean_weight; + syms.m_num_dc_levels = (uint32_t)(64.0f * scaled_weight_coding_scale) + 1; + assert(syms.m_num_dc_levels == gw_dct.get_num_weight_dc_levels(log_blk.m_weight_ise_range)); + + int dct_coeffs[astc_helpers::MAX_BLOCK_PIXELS]; + + for (uint32_t y = 0; y < grid_height; y++) + { + for (uint32_t x = 0; x < grid_width; x++) + { + if (!x && !y) + { + dct_coeffs[0] = 0; + continue; + } + + const int levels = dct_quant_tab[x + y * grid_width]; + +#if defined(DEBUG) || defined(_DEBUG) + // sanity checking + assert(levels == gw_dct.sample_quant_table(quant_state, x, y)); +#endif + + float d = dct_weights[x + y * grid_width]; + + int id = gw_dct.quantize_deadzone(d, levels, basist::astc_ldr_t::DEADZONE_ALPHA, x, y); + + dct_coeffs[x + y * grid_width] = id; + + } // x + + } // y + + const basisu::int_vec& zigzag = grid_dim_vals.m_zigzag; + assert(zigzag.size() == total_grid_samples); + + int total_zeros = 0; + for (uint32_t i = 0; i < total_grid_samples; i++) + { + uint32_t dct_idx = zigzag[i]; + if (!dct_idx) + continue; + + int coeff = dct_coeffs[dct_idx]; + if (!coeff) + { + total_zeros++; + continue; + } + + basist::astc_ldr_t::dct_syms::coeff cf; + cf.m_num_zeros = basisu::safe_cast_uint16(total_zeros); + cf.m_coeff = basisu::safe_cast_int16(coeff); + syms.m_coeffs.push_back(cf); + syms.m_max_coeff_mag = basisu::maximum(syms.m_max_coeff_mag, basisu::iabs(coeff)); + syms.m_max_zigzag_index = basisu::maximum(syms.m_max_zigzag_index, i); + + c.put_rice(total_zeros, gw_dct.m_zero_run); + total_zeros = 0; + + c.put_bits(coeff < 0 ? 1 : 0, 1); + + if (coeff < 0) + coeff = -coeff; + + c.put_rice(coeff, gw_dct.m_coeff); + } + + if (total_zeros) + { + basist::astc_ldr_t::dct_syms::coeff cf; + cf.m_num_zeros = basisu::safe_cast_uint16(total_zeros); + cf.m_coeff = INT16_MAX; + syms.m_coeffs.push_back(cf); + + c.put_rice(total_zeros, gw_dct.m_zero_run); + } +} + +void astc_ldr_requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range) +{ + if (from_ise_range == to_ise_range) + { + if (pDst_ise_vals != pSrc_ise_vals) + memcpy(pDst_ise_vals, pSrc_ise_vals, n); + return; + } + + // from/to BISE ranges not equal + if (from_ise_range == astc_helpers::BISE_64_LEVELS) + { + // from [0,64] + const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(to_ise_range).m_val_to_ise; + + for (uint32_t i = 0; i < n; i++) + pDst_ise_vals[i] = quant_tab[pSrc_ise_vals[i]]; + } + else if (to_ise_range == astc_helpers::BISE_64_LEVELS) + { + // to [0,64] + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val; + + for (uint32_t i = 0; i < n; i++) + pDst_ise_vals[i] = dequant_tab[pSrc_ise_vals[i]]; + } + else + { + // from/to any other + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val; + const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(to_ise_range).m_val_to_ise; + + for (uint32_t i = 0; i < n; i++) + pDst_ise_vals[i] = quant_tab[dequant_tab[pSrc_ise_vals[i]]]; + } +} + +void astc_ldr_downsample_ise_weights( + uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range, + uint32_t block_w, uint32_t block_h, + uint32_t grid_w, uint32_t grid_h, + const uint8_t* pSrc_weights, uint8_t* pDst_weights, + const float* pDownsample_matrix) +{ + assert((block_w <= astc_ldr::ASTC_LDR_MAX_BLOCK_WIDTH) && (block_h <= astc_ldr::ASTC_LDR_MAX_BLOCK_HEIGHT)); + assert((grid_w >= 2) && (grid_w <= block_w)); + assert((grid_h >= 2) && (grid_h <= block_h)); + + assert(((dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || + (dequant_weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + assert(((quant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (quant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)) || + (quant_weight_ise_range == astc_helpers::BISE_64_LEVELS)); + + assert(pDownsample_matrix); + + if ((block_w == grid_w) && (block_h == grid_h)) + { + if (dequant_weight_ise_range != quant_weight_ise_range) + { + astc_ldr_requantize_astc_weights(block_w * block_h, pSrc_weights, dequant_weight_ise_range, pDst_weights, quant_weight_ise_range); + } + else + { + if (pDst_weights != pSrc_weights) + memcpy(pDst_weights, pSrc_weights, block_w * block_h); + } + + return; + } + + uint8_t desired_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + if (dequant_weight_ise_range == astc_helpers::BISE_64_LEVELS) + { + memcpy(desired_weights, pSrc_weights, block_w * block_h); + } + else + { + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(dequant_weight_ise_range).m_ISE_to_val; + + for (uint32_t by = 0; by < block_h; by++) + for (uint32_t bx = 0; bx < block_w; bx++) + desired_weights[bx + by * block_w] = dequant_tab[pSrc_weights[bx + by * block_w]]; + } + + if (quant_weight_ise_range == astc_helpers::BISE_64_LEVELS) + { + downsample_weight_grid( + pDownsample_matrix, + block_w, block_h, // source/from dimension (block size) + grid_w, grid_h, // dest/to dimension (grid size) + desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + pDst_weights); // [wy][wx] + } + else + { + uint8_t raw_downsampled_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + downsample_weight_grid( + pDownsample_matrix, + block_w, block_h, // source/from dimension (block size) + grid_w, grid_h, // dest/to dimension (grid size) + desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + raw_downsampled_weights); // [wy][wx] + + const auto& weight_quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(quant_weight_ise_range).m_val_to_ise; + + for (uint32_t gy = 0; gy < grid_h; gy++) + for (uint32_t gx = 0; gx < grid_w; gx++) + pDst_weights[gx + gy * grid_w] = weight_quant_tab[raw_downsampled_weights[gx + gy * grid_w]]; + } +} + +void downsample_weight_residual_grid( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const int* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + float* pDst_weights) // [wy][wx] +{ + const uint32_t total_block_samples = bx * by; + + for (uint32_t y = 0; y < wy; y++) + { + for (uint32_t x = 0; x < wx; x++) + { + float total = 0.0f; + + for (uint32_t i = 0; i < total_block_samples; i++) + if (pMatrix_weights[i]) + total += pMatrix_weights[i] * (float)pSrc_weights[i]; + + pDst_weights[x + y * wx] = total; + + pMatrix_weights += total_block_samples; + } + } +} + +void downsample_weightsf( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const float* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + float* pDst_weights) // [wy][wx] +{ + const uint32_t total_block_samples = bx * by; + + for (uint32_t y = 0; y < wy; y++) + { + for (uint32_t x = 0; x < wx; x++) + { + float total = 0.0f; + + for (uint32_t i = 0; i < total_block_samples; i++) + if (pMatrix_weights[i]) + total += pMatrix_weights[i] * pSrc_weights[i]; + + pDst_weights[x + y * wx] = total; + + pMatrix_weights += total_block_samples; + } + } +} + +static inline uint32_t weighted_color_error(const color_rgba& a, const color_rgba& b, const astc_ldr::cem_encode_params& p) +{ + uint32_t total_e = 0; + for (uint32_t c = 0; c < 4; c++) + { + int av = a[c]; + int bv = b[c]; + int ev = av - bv; + total_e += (uint32_t)(ev * ev) * p.m_comp_weights[c]; + } + + return total_e; +} + +uint64_t eval_error( + uint32_t block_width, uint32_t block_height, + const astc_helpers::log_astc_block& enc_log_block, + const astc_ldr::pixel_stats_t& pixel_stats, + const astc_ldr::cem_encode_params& params) +{ + color_rgba dec_block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool status = astc_helpers::decode_block_xuastc_ldr(enc_log_block, dec_block_pixels, block_width, block_height, params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status) + { + // Shouldn't ever happen + assert(0); + return UINT64_MAX; + } + +#if defined(_DEBUG) || defined(DEBUG) + // Sanity check vs. unoptimized decoder + color_rgba dec_block_pixels_alt[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool alt_status = astc_helpers::decode_block(enc_log_block, dec_block_pixels_alt, block_width, block_height, params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!alt_status) + { + // Shouldn't ever happen + assert(0); + return UINT64_MAX; + } + + if (memcmp(dec_block_pixels, dec_block_pixels_alt, sizeof(color_rgba) * block_width * block_height) != 0) + { + // Very bad + assert(0); + return UINT64_MAX; + } +#endif + + uint64_t total_err = 0; + + const uint32_t total_block_pixels = block_width * block_height; + for (uint32_t i = 0; i < total_block_pixels; i++) + total_err += weighted_color_error(dec_block_pixels[i], pixel_stats.m_pixels[i], params); + + return total_err; +} + +uint64_t eval_error( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + uint32_t cem_index, + bool dual_plane_flag, int ccs_index, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint32_t grid_width, uint32_t grid_height, + const uint8_t* pEndpoint_vals, const uint8_t* pWeight_grid_vals0, const uint8_t* pWeight_grid_vals1, + const astc_ldr::cem_encode_params& params) +{ + const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = grid_width * grid_height; + + astc_helpers::log_astc_block enc_log_block; + + enc_log_block.clear(); + enc_log_block.m_grid_width = (uint8_t)grid_width; + enc_log_block.m_grid_height = (uint8_t)grid_height; + enc_log_block.m_weight_ise_range = (uint8_t)weight_ise_range; + enc_log_block.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + enc_log_block.m_color_endpoint_modes[0] = (uint8_t)cem_index; + enc_log_block.m_num_partitions = 1; + + memcpy(enc_log_block.m_endpoints, pEndpoint_vals, astc_helpers::get_num_cem_values(cem_index)); + + if (dual_plane_flag) + { + assert((ccs_index >= 0) && (ccs_index <= 3)); + + enc_log_block.m_dual_plane = true; + enc_log_block.m_color_component_selector = (uint8_t)ccs_index; + + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + enc_log_block.m_weights[i * 2 + 0] = pWeight_grid_vals0[i]; + enc_log_block.m_weights[i * 2 + 1] = pWeight_grid_vals1[i]; + } + } + else + { + assert(ccs_index < 0); + + memcpy(enc_log_block.m_weights, pWeight_grid_vals0, total_grid_pixels); + } + + color_rgba decoded_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool status = astc_helpers::decode_block(enc_log_block, decoded_pixels, block_width, block_height, params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + assert(status); + + if (!status) + return UINT64_MAX; + + uint64_t total_err = 0; + + for (uint32_t i = 0; i < total_block_pixels; i++) + total_err += weighted_color_error(pixel_stats.m_pixels[i], decoded_pixels[i], params); + + return total_err; +} + +float compute_psnr_from_wsse(uint32_t block_width, uint32_t block_height, uint64_t sse, float total_comp_weights) +{ + const uint32_t total_block_pixels = block_width * block_height; + const float wmse = (float)sse / (total_comp_weights * (float)total_block_pixels); + const float wpsnr = (wmse > 1e-5f) ? (20.0f * log10f(255.0f / sqrtf(wmse))) : 10000.0f; + return wpsnr; +} + +// quantized coordinate descent (QCD), quadratic objective +namespace qcd +{ + struct qcd_min_solver + { + // geometry / sizes + int m_N = 0; // texels + int m_K = 0; // controls + int m_Q = 0; // label count + + // inputs (not owned), (N x K) row-major + const float* m_pU = nullptr; // grid to texel upsample matrix + + // cached + float_vec m_ucols; // N*K, column k at &m_ucols[k*m_N] + float_vec m_alpha; // K, ||u_k||^2 (>= eps) + float_vec m_labels; // Q, sorted unique u-labels (ints in [0..64]), ASTC raw [0,64] weights + + bool m_ready_flag = false; + + // init: cache columns, norms, and label set + bool init(const float* pU_rowmajor, int N, int K, const int* pLabels_u, int Q) + { + if ((!pU_rowmajor) || (!pLabels_u) || (N <= 0) || (K <= 0) || (Q <= 0)) + return false; + + m_pU = pU_rowmajor; + m_N = N; + m_K = K; + m_Q = Q; + + // cache columns + m_ucols.assign(size_t(N) * K, 0.0f); + + for (int k = 0; k < K; ++k) + { + float* pDst = &m_ucols[size_t(k) * size_t(N)]; + const float* pSrc = m_pU + k; // first element of column k + for (int t = 0; t < N; ++t) + pDst[t] = pSrc[size_t(t) * size_t(K)]; + } + + // column norms + m_alpha.resize(K); + + for (int k = 0; k < K; ++k) + { + const float* pUK = &m_ucols[size_t(k) * size_t(N)]; + + float a = 0.0f; + for (int t = 0; t < N; ++t) + a += pUK[t] * pUK[t]; + + if (!(a > 0.0f)) + a = 1e-8f; + + m_alpha[k] = a; + } + + m_labels.assign(pLabels_u, pLabels_u + Q); + +#if defined(_DEBUG) || defined(DEBUG) + for (size_t i = 1; i < m_labels.size(); ++i) + { + assert(m_labels[i] > m_labels[i - 1]); // strictly increasing + assert((m_labels[i] >= 0) && (m_labels[i] <= 64)); + } +#endif + + m_Q = (int)m_labels.size(); + if (m_Q <= 0) + return false; + + m_ready_flag = true; + return true; + } + + // compute residual r = U*g - w* (uses label IDs -> u-values) + void build_residual(const int* pG_idx, const float* pW_star, float* pR_out) const + { + assert(m_ready_flag && pG_idx && pW_star && pR_out); + + // r = sum_k (u_label[pG_idx[k]] * ucol_k) - pW_star + std::fill(pR_out, pR_out + m_N, 0.0f); + + for (int k = 0; k < m_K; ++k) + { + const float* pUK = &m_ucols[size_t(k) * size_t(m_N)]; + const float s = m_labels[pG_idx[k]]; + + for (int t = 0; t < m_N; ++t) + pR_out[t] += s * pUK[t]; + } + + for (int t = 0; t < m_N; ++t) + pR_out[t] -= pW_star[t]; + } + + // one QCD sweep: returns num moves accepted (strict dE < -eps) + int sweep(int* pG_idx, float* pR_io, float accept_eps = 1e-6f) const + { + assert(m_ready_flag && pG_idx && pR_io); + int num_moved = 0; + + for (int k = 0; k < m_K; ++k) + { + const float* pUK = &m_ucols[size_t(k) * size_t(m_N)]; + + // beta = + float beta = 0.0f; + for (int t = 0; t < m_N; ++t) + beta += pR_io[t] * pUK[t]; + + const float a = m_alpha[k]; // >= 1e-8 + + const float cur_u = m_labels[pG_idx[k]]; + const float s_star = cur_u - beta / a; // continuous minimizer (u-domain) + + // nearest label index to s_star (binary search) + const int j0 = nearest_label_idx(s_star); + + const int cand[3] = + { + j0, + (j0 + 1 < m_Q) ? (j0 + 1) : j0, + (j0 - 1 >= 0) ? (j0 - 1) : j0 + }; + + int best_j = pG_idx[k]; + float best_dE = 0.0f; + + for (int c = 0; c < 3; ++c) + { + const int j = cand[c]; + if (j == pG_idx[k]) + continue; + + const float s = m_labels[j]; + const float d = s - cur_u; // u-change at coord k + const float dE = 2.0f * d * beta + d * d * a; // exact delta E + + if ((best_j == pG_idx[k]) || (dE < best_dE)) + { + best_dE = dE; + best_j = j; + } + } + + if ((best_j != pG_idx[k]) && (best_dE < -accept_eps)) + { + // commit: update residual and label ID + const float d = m_labels[best_j] - cur_u; + + for (int t = 0; t < m_N; ++t) + pR_io[t] += d * pUK[t]; + + pG_idx[k] = best_j; + ++num_moved; + } + } // k + + return num_moved; + } + + // utility: energy from residual (sum r^2) + float residual_energy(const float* pR) const + { + assert(pR); + + float E = 0.0f; + for (int t = 0; t < m_N; ++t) + E += pR[t] * pR[t]; + + return E; + } + + private: + // nearest label index by u-value (handles non-uniform spacing) + int nearest_label_idx(float x) const + { + const int Q = m_Q; + + if (Q <= 1) + return 0; + if (x <= m_labels.front()) + return 0; + if (x >= m_labels.back()) + return Q - 1; + + int lo = 0, hi = Q - 1; + while (hi - lo > 1) + { + const int mid = (lo + hi) >> 1; + (x >= m_labels[mid]) ? lo = mid : hi = mid; + } + + const float dlo = std::fabs(x - m_labels[lo]); + const float dhi = std::fabs(x - m_labels[hi]); + return (dlo <= dhi) ? lo : hi; + } + }; + +} // namespace qcd + +// 1-3 subsets, requires initial weights +bool polish_block_weights( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + astc_helpers::log_astc_block& enc_log_block, // assumes there is already a good encoding to improve here + const astc_ldr::cem_encode_params& params, + const astc_ldr::partition_pattern_vec* pPat, + bool& improved_flag, + bool gradient_descent_flag, bool polish_weights_flag, bool qcd_enabled_flag) +{ + improved_flag = false; + + if (!gradient_descent_flag && !polish_weights_flag && !qcd_enabled_flag) + return true; + + const uint32_t grid_width = enc_log_block.m_grid_width, grid_height = enc_log_block.m_grid_height; + const uint32_t cem_index = enc_log_block.m_color_endpoint_modes[0]; + const uint32_t num_subsets = enc_log_block.m_num_partitions; + const bool dual_plane_flag = enc_log_block.m_dual_plane; + //const uint32_t num_planes = dual_plane_flag ? 2 : 1; + const int ccs_index = dual_plane_flag ? enc_log_block.m_color_component_selector : -1; + + const uint32_t endpoint_ise_range = enc_log_block.m_endpoint_ise_range; + const uint32_t weight_ise_range = enc_log_block.m_weight_ise_range; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val; + const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_val_to_ise; + + //const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + +#if defined(_DEBUG) || defined(DEBUG) + if (num_subsets > 1) + { + for (uint32_t i = 1; i < num_subsets; i++) + { + assert(enc_log_block.m_color_endpoint_modes[i] == cem_index); + } + } +#endif + + //const astc_block_grid_data* pBlock_grid_data = find_astc_block_grid_data(block_width, block_height, grid_width, grid_height); + + const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = grid_width * grid_height; + + uint64_t cur_err = eval_error(block_width, block_height, enc_log_block, pixel_stats, params); + + uint8_t weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + astc_helpers::extract_weights(enc_log_block, weights0, 0); + + if (dual_plane_flag) + astc_helpers::extract_weights(enc_log_block, weights1, 1); + + const bool global_gradient_desc_enabled = true; + const bool global_qcd_enabled = true; + const bool global_polish_weights_enabled = true; + + const uint32_t NUM_WEIGHT_POLISH_PASSES = 1; + + // Gradient descent + if ((gradient_descent_flag) && (global_gradient_desc_enabled)) + { + // Downsample the residuals to grid res + vector2D upsample_matrix; + compute_upsample_matrix(upsample_matrix, block_width, block_height, grid_width, grid_height); + + // First compute the block's ideal raw weights given the current endpoints at full block/texel res + // TODO: Move to helper + uint8_t ideal_block_raw_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], ideal_block_raw_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + if (num_subsets == 1) + { + if (dual_plane_flag) + astc_ldr::eval_solution_dp(pixel_stats, cem_index, ccs_index, enc_log_block.m_endpoints, endpoint_ise_range, ideal_block_raw_weights0, ideal_block_raw_weights1, astc_helpers::BISE_64_LEVELS, params); + else + astc_ldr::eval_solution(pixel_stats, cem_index, enc_log_block.m_endpoints, endpoint_ise_range, ideal_block_raw_weights0, astc_helpers::BISE_64_LEVELS, params); + } + else + { + // Extract each subset's texels, compute the raw weights, place back into full res texel/block weight grid. + color_rgba part_pixels[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint32_t num_part_pixels[astc_helpers::MAX_PARTITIONS] = { 0 }; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba& px = pixel_stats.m_pixels[x + y * block_width]; + + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_subsets); + + // Sanity check + assert(part_index == (uint32_t)astc_helpers::compute_texel_partition(enc_log_block.m_partition_id, x, y, 0, num_subsets, astc_helpers::is_small_block(block_width, block_height))); + + part_pixels[part_index][num_part_pixels[part_index]] = px; + num_part_pixels[part_index]++; + } // x + } // y + + astc_ldr::pixel_stats_t part_pixel_stats[astc_helpers::MAX_PARTITIONS]; + + for (uint32_t i = 0; i < num_subsets; i++) + part_pixel_stats[i].clear(); + + uint8_t part_raw_weights[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t part_index = 0; part_index < num_subsets; part_index++) + { + part_pixel_stats[part_index].init(num_part_pixels[part_index], &part_pixels[part_index][0]); + + const uint8_t* pPart_endpoints = astc_helpers::get_endpoints(enc_log_block, part_index); + + astc_ldr::eval_solution(part_pixel_stats[part_index], cem_index, pPart_endpoints, endpoint_ise_range, &part_raw_weights[part_index][0], astc_helpers::BISE_64_LEVELS, params); + + } // part_index + + clear_obj(num_part_pixels); + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_subsets); + + ideal_block_raw_weights0[x + y * block_width] = part_raw_weights[part_index][num_part_pixels[part_index]]; + num_part_pixels[part_index]++; + } // x + } // y + } + +#if 1 + // Now compute the current block/texel res (upsampled) raw [0,64] weights given the current quantized grid weights. Dequant then upsample. + // This is what an ASTC decoder would use during unpacking. + uint8_t dequantized_grid_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], dequantized_grid_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t dequantized_block_weights_upsampled0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], dequantized_block_weights_upsampled1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + astc_ldr_requantize_astc_weights(total_grid_pixels, weights0, weight_ise_range, dequantized_grid_weights0, astc_helpers::BISE_64_LEVELS); + + if (dual_plane_flag) + astc_ldr_requantize_astc_weights(total_grid_pixels, weights1, weight_ise_range, dequantized_grid_weights1, astc_helpers::BISE_64_LEVELS); + + astc_helpers::upsample_weight_grid( + block_width, block_height, // destination/to dimension + grid_width, grid_height, // source/from dimension + dequantized_grid_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + dequantized_block_weights_upsampled0); // [by][bx] + + if (dual_plane_flag) + { + astc_helpers::upsample_weight_grid( + block_width, block_height, // destination/to dimension + grid_width, grid_height, // source/from dimension + dequantized_grid_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + dequantized_block_weights_upsampled1); // [by][bx] + } + + // Now compute residuals at the block res + int weight_block_raw_residuals0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], weight_block_raw_residuals1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < total_block_pixels; i++) + weight_block_raw_residuals0[i] = ideal_block_raw_weights0[i] - dequantized_block_weights_upsampled0[i]; + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_block_pixels; i++) + weight_block_raw_residuals1[i] = ideal_block_raw_weights1[i] - dequantized_block_weights_upsampled1[i]; + } + + float weight_grid_residuals_downsampled0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], weight_grid_residuals_downsampled1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + basisu::vector unweighted_downsample_matrix; + + // TODO: precompute, store in weight grid data + compute_upsample_matrix_transposed(unweighted_downsample_matrix, block_width, block_height, grid_width, grid_height); + + basisu::vector diag_AtA(total_grid_pixels); + compute_diag_AtA_vector(block_width, block_height, grid_width, grid_height, upsample_matrix, diag_AtA.get_ptr()); + + downsample_weight_residual_grid( + unweighted_downsample_matrix.get_ptr(), + block_width, block_height, // source/from dimension (block size) + grid_width, grid_height, // dest/to dimension (grid size) + weight_block_raw_residuals0, // these are dequantized weights, NOT ISE symbols, [by][bx] + weight_grid_residuals_downsampled0); // [wy][wx] + + for (uint32_t i = 0; i < total_grid_pixels; i++) + weight_grid_residuals_downsampled0[i] /= diag_AtA[i]; + + if (dual_plane_flag) + { + downsample_weight_residual_grid( + unweighted_downsample_matrix.get_ptr(), + block_width, block_height, // source/from dimension (block size) + grid_width, grid_height, // dest/to dimension (grid size) + weight_block_raw_residuals1, // these are dequantized weights, NOT ISE symbols, [by][bx] + weight_grid_residuals_downsampled1); // [wy][wx] + + for (uint32_t i = 0; i < total_grid_pixels; i++) + weight_grid_residuals_downsampled1[i] /= diag_AtA[i]; + } + + // Apply the residuals at grid res and quantize + const float Q = 1.0f; + + uint8_t refined_grid_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], refined_grid_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + float v = (float)dequant_tab[weights0[i]] + weight_grid_residuals_downsampled0[i] * Q; + int iv = clamp((int)std::roundf(v), 0, 64); + refined_grid_weights0[i] = quant_tab[iv]; + } + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + float v = (float)dequant_tab[weights1[i]] + weight_grid_residuals_downsampled1[i] * Q; + int iv = clamp((int)std::roundf(v), 0, 64); + refined_grid_weights1[i] = quant_tab[iv]; + } + } +#else + uint8_t refined_grid_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], refined_grid_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t i = 0; i < total_grid_pixels; i++) + refined_grid_weights0[i] = weights0[i]; + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_grid_pixels; i++) + refined_grid_weights1[i] = weights1[i]; + } +#endif + + astc_helpers::log_astc_block refined_log_block(enc_log_block); + + // TODO: This refines both weight planes simultanously, probably not optimal, could do individually. + astc_helpers::set_weights(refined_log_block, refined_grid_weights0, 0); + + if (dual_plane_flag) + astc_helpers::set_weights(refined_log_block, refined_grid_weights1, 1); + + uint64_t refined_err = eval_error(block_width, block_height, refined_log_block, pixel_stats, params); + + if (refined_err < cur_err) + { + cur_err = refined_err; + + memcpy(weights0, refined_grid_weights0, total_grid_pixels); + + if (dual_plane_flag) + memcpy(weights1, refined_grid_weights1, total_grid_pixels); + + improved_flag = true; + } + + // QCD - not a huge boost (.05-.75 dB), but on the toughest blocks it does help. + if ((qcd_enabled_flag) && (global_qcd_enabled)) + { + float ideal_block_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], ideal_block_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + for (uint32_t i = 0; i < total_block_pixels; i++) + { + ideal_block_weights0[i] = (float)ideal_block_raw_weights0[i]; + + if (dual_plane_flag) + ideal_block_weights1[i] = (float)ideal_block_raw_weights1[i]; + } + + const float* pUpsample_matrix = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, grid_width, grid_height)->m_upsample_matrix.get_ptr(); + + qcd::qcd_min_solver solver; + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(weight_ise_range); + + assert(num_weight_levels <= 32); + int labels[32 + 1]; + + for (uint32_t i = 0; i < num_weight_levels; i++) + labels[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).get_rank_to_val(i); + + solver.init(pUpsample_matrix, total_block_pixels, total_grid_pixels, labels, num_weight_levels); + + int grid_idx0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], grid_idx1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_rank; + + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + grid_idx0[i] = ise_to_rank[refined_grid_weights0[i]]; + + if (dual_plane_flag) + grid_idx1[i] = ise_to_rank[refined_grid_weights1[i]]; + } + + float resid0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], resid1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + solver.build_residual(grid_idx0, ideal_block_weights0, resid0); + + const uint32_t MAX_QCD_SWEEPS = 5; + for (uint32_t t = 0; t < MAX_QCD_SWEEPS; t++) + { + int moved0 = solver.sweep(grid_idx0, resid0); + if (!moved0) + break; + } + + if (dual_plane_flag) + { + solver.build_residual(grid_idx1, ideal_block_weights1, resid1); + + for (uint32_t t = 0; t < MAX_QCD_SWEEPS; t++) + { + int moved1 = solver.sweep(grid_idx1, resid1); + if (!moved1) + break; + } + } + + const auto& rank_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_rank_to_ISE; + + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + refined_grid_weights0[i] = rank_to_ise[grid_idx0[i]]; + + if (dual_plane_flag) + refined_grid_weights1[i] = rank_to_ise[grid_idx1[i]]; + } + + refined_log_block = enc_log_block; + + astc_helpers::set_weights(refined_log_block, refined_grid_weights0, 0); + + if (dual_plane_flag) + astc_helpers::set_weights(refined_log_block, refined_grid_weights1, 1); + + refined_err = eval_error(block_width, block_height, refined_log_block, pixel_stats, params); + + if (refined_err < cur_err) + { + cur_err = refined_err; + + memcpy(weights0, refined_grid_weights0, total_grid_pixels); + + if (dual_plane_flag) + memcpy(weights1, refined_grid_weights1, total_grid_pixels); + + improved_flag = true; + } + } + } // if (qcd_enabled) + + if ((polish_weights_flag) && (global_polish_weights_enabled)) + { + // Final, expensive, weight polish. Much can be done to improve this, but it's hopefully not ran much in the first place. + // TODO: The dB gain from this is large, must optimize. + for (uint32_t polish_pass = 0; polish_pass < NUM_WEIGHT_POLISH_PASSES; polish_pass++) + { + for (uint32_t y = 0; y < grid_height; y++) + { + for (uint32_t x = 0; x < grid_width; x++) + { + for (uint32_t plane_iter = 0; plane_iter < (dual_plane_flag ? 2u : 1u); plane_iter++) + { + uint8_t base_grid_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], base_grid_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + memcpy(base_grid_weights0, weights0, total_grid_pixels); + if (dual_plane_flag) + memcpy(base_grid_weights1, weights1, total_grid_pixels); + + for (int delta = -1; delta <= 1; delta += 2) + { + uint8_t trial_grid_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], trial_grid_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + memcpy(trial_grid_weights0, base_grid_weights0, total_grid_pixels); + + if (dual_plane_flag) + memcpy(trial_grid_weights1, base_grid_weights1, total_grid_pixels); + + if (plane_iter == 0) + trial_grid_weights0[x + y * grid_width] = (uint8_t)astc_ldr::apply_delta_to_bise_weight_val(weight_ise_range, base_grid_weights0[x + y * grid_width], delta); + else + trial_grid_weights1[x + y * grid_width] = (uint8_t)astc_ldr::apply_delta_to_bise_weight_val(weight_ise_range, base_grid_weights1[x + y * grid_width], delta); + + astc_helpers::log_astc_block trial_log_block(enc_log_block); + + astc_helpers::set_weights(trial_log_block, trial_grid_weights0, 0); + + if (dual_plane_flag) + astc_helpers::set_weights(trial_log_block, trial_grid_weights1, 1); + + uint64_t trial_err = eval_error(block_width, block_height, trial_log_block, pixel_stats, params); + + if (trial_err < cur_err) + { + cur_err = trial_err; + + memcpy(weights0, trial_grid_weights0, total_grid_pixels); + + if (dual_plane_flag) + memcpy(weights1, trial_grid_weights1, total_grid_pixels); + + improved_flag = true; + } + + } // delta + + } // plane_iter + + } // x + } // y + + } // polish_pass + + } // polish_flag + + astc_helpers::log_astc_block new_log_block(enc_log_block); + + astc_helpers::set_weights(new_log_block, weights0, 0); + + if (dual_plane_flag) + astc_helpers::set_weights(new_log_block, weights1, 1); + +#if defined(_DEBUG) || defined(DEBUG) + uint64_t new_err = eval_error(block_width, block_height, new_log_block, pixel_stats, params); + + assert(cur_err == new_err); + + if (improved_flag) + { + uint64_t orig_err = eval_error(block_width, block_height, enc_log_block, pixel_stats, params); + + assert(new_err < orig_err); + } +#endif + + enc_log_block = new_log_block; + + return true; +} + +bool encode_trial_subsets( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + uint32_t cem_index, uint32_t num_parts, + uint32_t pat_seed_index, const astc_ldr::partition_pattern_vec* pPat, // seed index is a ASTC partition pattern index + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint32_t grid_width, uint32_t grid_height, + astc_helpers::log_astc_block& enc_log_block, + const astc_ldr::cem_encode_params& params, + bool refine_only_flag = false, + bool gradient_descent_flag = true, bool polish_weights_flag = true, bool qcd_enabled_flag = true, + bool use_blue_contraction = true, + bool* pBase_ofs_clamped_flag = nullptr) +{ + assert((num_parts >= 2) && (num_parts <= astc_helpers::MAX_PARTITIONS)); + assert(pPat); + assert(pat_seed_index < astc_helpers::NUM_PARTITION_PATTERNS); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + //const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = grid_width * grid_height; + + color_rgba part_pixels[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint32_t num_part_pixels[astc_helpers::MAX_PARTITIONS] = { 0 }; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba& px = pixel_stats.m_pixels[x + y * block_width]; + + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + part_pixels[part_index][num_part_pixels[part_index]] = px; + num_part_pixels[part_index]++; + } // x + } // y + +#if defined(_DEBUG) || defined(DEBUG) + for (uint32_t i = 0; i < num_parts; i++) + assert(num_part_pixels[i]); +#endif + + astc_ldr::pixel_stats_t part_pixel_stats[astc_helpers::MAX_PARTITIONS]; + + for (uint32_t i = 0; i < num_parts; i++) + part_pixel_stats[i].clear(); + + uint8_t part_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t part_weights[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + { + part_pixel_stats[part_index].init(num_part_pixels[part_index], &part_pixels[part_index][0]); + + if (!refine_only_flag) + { + bool base_ofs_clamped_flag = false; + + // Encode at block res, but with quantized weights + uint64_t block_err = astc_ldr::cem_encode_pixels(cem_index, -1, part_pixel_stats[part_index], params, + endpoint_ise_range, weight_ise_range, + &part_endpoints[part_index][0], &part_weights[part_index][0], nullptr, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + + if (block_err == UINT64_MAX) + return false; + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = true; + } + + } // part_index + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + if (!refine_only_flag) + { + uint8_t block_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + clear_obj(num_part_pixels); + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + block_weights[x + y * block_width] = part_weights[part_index][num_part_pixels[part_index]]; + num_part_pixels[part_index]++; + } // x + } // y + + enc_log_block.clear(); + + enc_log_block.m_grid_width = (uint8_t)grid_width; + enc_log_block.m_grid_height = (uint8_t)grid_height; + enc_log_block.m_weight_ise_range = (uint8_t)weight_ise_range; + enc_log_block.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + enc_log_block.m_num_partitions = (uint8_t)num_parts; + for (uint32_t i = 0; i < num_parts; i++) + enc_log_block.m_color_endpoint_modes[i] = (uint8_t)cem_index; + enc_log_block.m_partition_id = (uint16_t)pat_seed_index; + + if (is_downsampling) + { + // TODO: Make the downsample step faster + const float* pDownsample_matrix = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, grid_width, grid_height)->m_downsample_matrix.get_ptr(); + + // Now downsample the weight grid (quantized to quantized) + astc_ldr_downsample_ise_weights( + weight_ise_range, weight_ise_range, + block_width, block_height, + grid_width, grid_height, + block_weights, enc_log_block.m_weights, + pDownsample_matrix); + } + else + { + memcpy(enc_log_block.m_weights, block_weights, total_grid_pixels); + } + + for (uint32_t p = 0; p < num_parts; p++) + memcpy(enc_log_block.m_endpoints + num_endpoint_vals * p, &part_endpoints[p][0], num_endpoint_vals); + } + + // attempt endpoint refinement given the current weights + // TODO: Expose to caller + const uint32_t NUM_REFINEMENT_PASSES = 3; + for (uint32_t refine_pass = 0; refine_pass < NUM_REFINEMENT_PASSES; refine_pass++) + { + uint8_t dequantized_raw_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t upsampled_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; // raw weights, NOT ISE + + for (uint32_t i = 0; i < total_grid_pixels; i++) + dequantized_raw_weights0[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[enc_log_block.m_weights[i]]; + + astc_helpers::upsample_weight_grid(block_width, block_height, grid_width, grid_height, dequantized_raw_weights0, upsampled_weights0); + + astc_helpers::log_astc_block alt_enc_log_block(enc_log_block); + + uint8_t raw_part_weights[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + clear_obj(num_part_pixels); + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_parts); + + raw_part_weights[part_index][num_part_pixels[part_index]] = upsampled_weights0[x + y * block_width]; + num_part_pixels[part_index]++; + } // x + } // y + + for (uint32_t part_index = 0; part_index < num_parts; part_index++) + { + assert(num_part_pixels[part_index] == part_pixel_stats[part_index].m_num_pixels); + + astc_ldr::cem_encode_params temp_params(params); + temp_params.m_pForced_weight_vals0 = &raw_part_weights[part_index][0]; + + uint8_t temp_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + bool base_ofs_clamped_flag = false; + + // Encode at block res, but with quantized weights + uint64_t block_err = astc_ldr::cem_encode_pixels(cem_index, -1, part_pixel_stats[part_index], temp_params, + endpoint_ise_range, astc_helpers::BISE_64_LEVELS, + &alt_enc_log_block.m_endpoints[num_endpoint_vals * part_index], temp_weights, nullptr, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + + if (block_err == UINT64_MAX) + return false; + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = true; + +#if defined(_DEBUG) || defined(DEBUG) + for (uint32_t i = 0; i < part_pixel_stats[part_index].m_num_pixels; i++) + { + assert(temp_weights[i] == temp_params.m_pForced_weight_vals0[i]); + } +#endif + + } // part_index + + uint64_t cur_err = eval_error(block_width, block_height, enc_log_block, pixel_stats, params); + uint64_t ref_err = eval_error(block_width, block_height, alt_enc_log_block, pixel_stats, params); + + if (ref_err < cur_err) + { + memcpy(&enc_log_block, &alt_enc_log_block, sizeof(astc_helpers::log_astc_block)); + } + + if (refine_pass == (NUM_REFINEMENT_PASSES - 1)) + break; + + if ((is_downsampling) && (gradient_descent_flag || polish_weights_flag)) + { + bool improved_flag = false; + bool status = polish_block_weights(block_width, block_height, pixel_stats, enc_log_block, params, pPat, improved_flag, gradient_descent_flag, polish_weights_flag, qcd_enabled_flag); + if (!status) + { + assert(0); + } + + if (!improved_flag) + break; + } + else + { + break; + } + } // refine_pass + + return true; +} + +bool encode_trial( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + uint32_t cem_index, + bool dual_plane_flag, int ccs_index, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint32_t grid_width, uint32_t grid_height, + astc_helpers::log_astc_block& enc_log_block, + const astc_ldr::cem_encode_params& params, + bool gradient_descent_flag = true, bool polish_weights_flag = true, bool qcd_enabled_flag = true, + bool use_blue_contraction = true, + bool* pBase_ofs_clamped_flag = nullptr) +{ + assert(dual_plane_flag || (ccs_index == -1)); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + + const basist::astc_ldr_t::astc_block_grid_data* pBlock_grid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, grid_width, grid_height); + + const float* pDownsample_matrix = nullptr; + if (is_downsampling) + pDownsample_matrix = pBlock_grid_data->m_downsample_matrix.get_ptr(); + + //const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = grid_width * grid_height; + + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val; + //const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_val_to_ise; + + enc_log_block.clear(); + + enc_log_block.m_grid_width = (uint8_t)grid_width; + enc_log_block.m_grid_height = (uint8_t)grid_height; + enc_log_block.m_weight_ise_range = (uint8_t)weight_ise_range; + enc_log_block.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + enc_log_block.m_dual_plane = dual_plane_flag; + if (dual_plane_flag) + { + assert((ccs_index >= 0) && (ccs_index <= 3)); + enc_log_block.m_color_component_selector = (uint8_t)ccs_index; + } + else + { + assert(ccs_index == -1); + } + + enc_log_block.m_num_partitions = 1; + enc_log_block.m_color_endpoint_modes[0] = (uint8_t)cem_index; + + uint8_t fullres_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + if ((grid_width == block_width) && (grid_height == block_height)) + { + bool base_ofs_clamped_flag = false; + + uint64_t block_err = astc_ldr::cem_encode_pixels(cem_index, ccs_index, pixel_stats, params, + endpoint_ise_range, weight_ise_range, + fullres_endpoints, weights0, weights1, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + + if (block_err == UINT64_MAX) + return false; + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = base_ofs_clamped_flag; + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + enc_log_block.m_weights[i * 2 + 0] = weights0[i]; + enc_log_block.m_weights[i * 2 + 1] = weights1[i]; + } + } + else + { + memcpy(enc_log_block.m_weights, weights0, total_grid_pixels); + } + + memcpy(enc_log_block.m_endpoints, fullres_endpoints, astc_helpers::get_num_cem_values(cem_index)); + + return true; + } + + // Handle downsampled weight grids case + + uint8_t fullres_raw_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t fullres_raw_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + bool base_ofs_clamped_flag = false; + + // Encode at block res, but with quantized weights + uint64_t block_err = astc_ldr::cem_encode_pixels(cem_index, ccs_index, pixel_stats, params, + endpoint_ise_range, weight_ise_range, + fullres_endpoints, fullres_raw_weights0, fullres_raw_weights1, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + + if (block_err == UINT64_MAX) + return false; + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = base_ofs_clamped_flag; + + // Now downsample the weight grid (quantized to quantized) + astc_ldr_downsample_ise_weights( + weight_ise_range, weight_ise_range, + block_width, block_height, + grid_width, grid_height, + fullres_raw_weights0, weights0, + pDownsample_matrix); + + astc_helpers::set_weights(enc_log_block, weights0, 0); + + if (dual_plane_flag) + { + astc_ldr_downsample_ise_weights( + weight_ise_range, weight_ise_range, + block_width, block_height, + grid_width, grid_height, + fullres_raw_weights1, weights1, + pDownsample_matrix); + } + + if (dual_plane_flag) + astc_helpers::set_weights(enc_log_block, weights1, 1); + + memcpy(enc_log_block.m_endpoints, fullres_endpoints, astc_helpers::get_num_cem_values(cem_index)); + + // TODO: Expose to caller + const uint32_t NUM_OUTER_PASSES = 3; + for (uint32_t outer_pass = 0; outer_pass < NUM_OUTER_PASSES; outer_pass++) + { + // endpoint refinement, given current upsampled weights + { + astc_helpers::extract_weights(enc_log_block, weights0, 0); + + if (dual_plane_flag) + astc_helpers::extract_weights(enc_log_block, weights1, 1); + + // Plane 0 + uint8_t dequantized_raw_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t upsampled_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; // raw weights, NOT ISE + + for (uint32_t i = 0; i < total_grid_pixels; i++) + dequantized_raw_weights0[i] = dequant_tab[weights0[i]]; + + astc_helpers::upsample_weight_grid(block_width, block_height, grid_width, grid_height, dequantized_raw_weights0, upsampled_weights0); + + // Plane 1 + uint8_t dequantized_raw_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t upsampled_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; // raw weights, NOT ISE + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_grid_pixels; i++) + dequantized_raw_weights1[i] = dequant_tab[weights1[i]]; + astc_helpers::upsample_weight_grid(block_width, block_height, grid_width, grid_height, dequantized_raw_weights1, upsampled_weights1); + } + + // Jam in the weights to the actual raw [0,64] weights the decoder is going to use after upsampling the grid. + astc_ldr::cem_encode_params refine_params(params); + refine_params.m_pForced_weight_vals0 = upsampled_weights0; + if (dual_plane_flag) + refine_params.m_pForced_weight_vals1 = upsampled_weights1; + + uint8_t refined_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t refined_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t refined_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + uint64_t refined_block_err = astc_ldr::cem_encode_pixels(cem_index, ccs_index, pixel_stats, refine_params, + endpoint_ise_range, astc_helpers::BISE_64_LEVELS, + refined_endpoints, refined_weights0, refined_weights1, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + assert(refined_block_err != UINT64_MAX); + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = base_ofs_clamped_flag; + + if (refined_block_err != UINT64_MAX) + { + uint64_t cur_err = eval_error( + block_width, block_height, + pixel_stats, + cem_index, + dual_plane_flag, ccs_index, + endpoint_ise_range, weight_ise_range, + grid_width, grid_height, + enc_log_block.m_endpoints, weights0, weights1, + params); + + if (refined_block_err < cur_err) + { + memcpy(enc_log_block.m_endpoints, refined_endpoints, astc_helpers::get_num_cem_values(cem_index)); + } + } + } + + if (outer_pass == (NUM_OUTER_PASSES - 1)) + break; + + if ((!gradient_descent_flag) && (!polish_weights_flag)) + break; + + bool improved_flag = false; + + bool status = polish_block_weights( + block_width, block_height, + pixel_stats, + enc_log_block, // assumes there is already a good encoding to improve here + params, + nullptr, + improved_flag, + gradient_descent_flag, + polish_weights_flag, + qcd_enabled_flag); + + if (!status) + { + assert(0); + return false; + } + + if (!improved_flag) + break; + + } // outer_pass + + return true; +} + +// 1 part only, refines endpoints given current weights +bool encode_trial_refine_only( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + astc_helpers::log_astc_block& enc_log_block, + const astc_ldr::cem_encode_params& params, + bool use_blue_contraction = true, + bool* pBase_ofs_clamped_flag = nullptr) +{ + assert(enc_log_block.m_num_partitions == 1); + + if (pBase_ofs_clamped_flag) + *pBase_ofs_clamped_flag = false; + + const uint32_t cem_index = enc_log_block.m_color_endpoint_modes[0]; + const bool dual_plane_flag = enc_log_block.m_dual_plane; + const int ccs_index = dual_plane_flag ? enc_log_block.m_color_component_selector : -1; + const uint32_t endpoint_ise_range = enc_log_block.m_endpoint_ise_range; + const uint32_t weight_ise_range = enc_log_block.m_weight_ise_range; + const uint32_t grid_width = enc_log_block.m_grid_width; + const uint32_t grid_height = enc_log_block.m_grid_height; + + //const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + + //const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = grid_width * grid_height; + + uint8_t dequantized_raw_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t upsampled_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; // raw weights, NOT ISE + + for (uint32_t i = 0; i < total_grid_pixels; i++) + dequantized_raw_weights0[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[astc_helpers::get_weight(enc_log_block, 0, i)]; + + // suppress bogus gcc warning on dequantized_raw_weights0 +#ifndef __clang__ +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#endif + + astc_helpers::upsample_weight_grid(block_width, block_height, grid_width, grid_height, dequantized_raw_weights0, upsampled_weights0); + +#ifndef __clang__ +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +#endif + + uint8_t dequantized_raw_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t upsampled_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; // raw weights, NOT ISE + + if (dual_plane_flag) + { + for (uint32_t i = 0; i < total_grid_pixels; i++) + dequantized_raw_weights1[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[astc_helpers::get_weight(enc_log_block, 1, i)]; + astc_helpers::upsample_weight_grid(block_width, block_height, grid_width, grid_height, dequantized_raw_weights1, upsampled_weights1); + } + + astc_ldr::cem_encode_params refine_params(params); + refine_params.m_pForced_weight_vals0 = upsampled_weights0; + if (dual_plane_flag) + refine_params.m_pForced_weight_vals1 = upsampled_weights1; + + uint8_t refined_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + uint8_t refined_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint8_t refined_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + //bool use_blue_contraction = true; + + bool base_ofs_clamped_flag = false; + + uint64_t refined_block_err = astc_ldr::cem_encode_pixels(cem_index, ccs_index, pixel_stats, refine_params, + endpoint_ise_range, astc_helpers::BISE_64_LEVELS, + refined_endpoints, refined_weights0, refined_weights1, UINT64_MAX, use_blue_contraction, &base_ofs_clamped_flag); + assert(refined_block_err != UINT64_MAX); + + if ((pBase_ofs_clamped_flag) && (base_ofs_clamped_flag)) + *pBase_ofs_clamped_flag = base_ofs_clamped_flag; + +#if defined(_DEBUG) || defined(DEBUG) + for (uint32_t i = 0; i < total_grid_pixels; i++) + { + assert(refined_weights0[i] == upsampled_weights0[i]); + + if (dual_plane_flag) + { + assert(refined_weights1[i] == upsampled_weights1[i]); + } + } +#endif + + if (refined_block_err != UINT64_MAX) + { + astc_helpers::log_astc_block alt_enc_log_block(enc_log_block); + memcpy(alt_enc_log_block.m_endpoints, refined_endpoints, astc_helpers::get_num_cem_values(cem_index)); + +#if defined(_DEBUG) || defined(DEBUG) + // refined_block_err was computed on the actual ASTC [0,64] upsampled weights the decoder would use. But double check this for sanity. + { + uint64_t ref_err = eval_error(block_width, block_height, alt_enc_log_block, pixel_stats, params); + assert(ref_err == refined_block_err); + } +#endif + + uint64_t cur_err = eval_error(block_width, block_height, enc_log_block, pixel_stats, params); + + if (refined_block_err < cur_err) + { + memcpy(enc_log_block.m_endpoints, refined_endpoints, astc_helpers::get_num_cem_values(cem_index)); + } + } + + return true; +} + +struct log_surrogate_astc_blk +{ + int m_grid_width, m_grid_height; + + uint32_t m_cem_index; // base+scale or direct variants only + int m_ccs_index; // -1 for single plane + + uint32_t m_num_endpoint_levels; + uint32_t m_num_weight_levels; + + uint32_t m_num_parts; // 1-3 + uint32_t m_seed_index; // ASTC seed index, 10-bits if m_num_parts > 1 + + vec4F m_endpoints[astc_helpers::MAX_PARTITIONS][2]; // [subset_index][l/h endpoint] + float m_scales[astc_helpers::MAX_PARTITIONS]; // scale factor used for each subset + + float m_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + float m_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + void clear() + { + memset((void *)this, 0, sizeof(*this)); + } + + void decode(uint32_t block_width, uint32_t block_height, vec4F* pPixels, const astc_ldr::partition_pattern_vec* pPat) const; + void decode(uint32_t block_width, uint32_t block_height, vec4F* pPixels, const astc_ldr::partitions_data* pPat_data) const; +}; + +void upsample_surrogate_weights( + const astc_helpers::weighted_sample* pWeighted_samples, + const float* pSrc_weights, + float* pDst_weights, + uint32_t by, uint32_t bx, + uint32_t wx, uint32_t wy, + uint32_t num_weight_levels) +{ + const uint32_t total_src_weights = wx * wy; + const float weight_levels_minus_1 = (float)(num_weight_levels - 1) * (1.0f / 16.0f); + const float inv_weight_levels = 1.0f / (float)(num_weight_levels - 1); + + const astc_helpers::weighted_sample* pS = pWeighted_samples; + + for (uint32_t y = 0; y < by; y++) + { + for (uint32_t x = 0; x < bx; x++, ++pS) + { + const uint32_t w00 = pS->m_weights[0][0]; + const uint32_t w01 = pS->m_weights[0][1]; + const uint32_t w10 = pS->m_weights[1][0]; + const uint32_t w11 = pS->m_weights[1][1]; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = pS->m_src_x, sy = pS->m_src_y; + + float total = 0.0f; + + if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * (float)w00; + if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * (float)w01; + if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * (float)w10; + if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * (float)w11; + + float w = (float)fast_roundf_pos_int(total * weight_levels_minus_1) * inv_weight_levels; + + pDst_weights[x + y * bx] = w; + } // x + } // y +} + +void log_surrogate_astc_blk::decode(uint32_t block_width, uint32_t block_height, vec4F* pPixels, const astc_ldr::partition_pattern_vec* pPat) const +{ + const bool dual_plane = (m_ccs_index >= 0); + + const uint32_t total_block_pixels = block_width * block_height; + const uint32_t total_grid_pixels = m_grid_width * m_grid_height; + + const bool needs_upsampling = total_grid_pixels < total_block_pixels; + + const bool is_small_block = total_block_pixels < 31; // astc_helpers::is_small_block(block_width, block_height); + BASISU_NOTE_UNUSED(is_small_block); + + float upsampled_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], upsampled_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + const float* pWeights0 = m_weights0; + const float* pWeights1 = m_weights1; + + if (needs_upsampling) + { + // TODO: Precompute these in tables + astc_helpers::weighted_sample up_weights[astc_helpers::MAX_BLOCK_DIM * astc_helpers::MAX_BLOCK_DIM]; + astc_helpers::compute_upsample_weights(block_width, block_height, m_grid_width, m_grid_height, up_weights); + + upsample_surrogate_weights(up_weights, m_weights0, upsampled_weights0, block_width, block_height, m_grid_width, m_grid_height, m_num_weight_levels); + pWeights0 = upsampled_weights0; + + if (dual_plane) + { + upsample_surrogate_weights(up_weights, m_weights1, upsampled_weights1, block_width, block_height, m_grid_width, m_grid_height, m_num_weight_levels); + pWeights1 = upsampled_weights1; + } + } + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + uint32_t part_index = 0; + if (m_num_parts > 1) + { + part_index = (*pPat)(x, y); + assert(part_index < m_num_parts); + + assert(part_index == (uint32_t)astc_helpers::compute_texel_partition(m_seed_index, x, y, 0, m_num_parts, is_small_block)); + } + + const vec4F& l = m_endpoints[part_index][0]; + const vec4F& h = m_endpoints[part_index][1]; + + vec4F& dst = pPixels[x + y * block_width]; + + for (uint32_t c = 0; c < 4; c++) + { + float w = ((int)c == m_ccs_index) ? pWeights1[x + y * block_width] : pWeights0[x + y * block_width]; + + //dst[c] = lerp(l[c], h[c], w); + + const float one_minus_w = 1.0f - w; + dst[c] = l[c] * one_minus_w + h[c] * w; + } // c + + } // x + } // y +} + +void log_surrogate_astc_blk::decode(uint32_t block_width, uint32_t block_height, vec4F* pPixels, const astc_ldr::partitions_data* pPat_data) const +{ + if (m_num_parts == 1) + return decode(block_width, block_height, pPixels, (const astc_ldr::partition_pattern_vec*)nullptr); + + uint32_t unique_pat_index = pPat_data->m_part_seed_to_unique_index[m_seed_index]; + assert(unique_pat_index < pPat_data->m_total_unique_patterns); + + return decode(block_width, block_height, pPixels, &pPat_data->m_partition_pats[unique_pat_index]); +} + +void downsample_float_weight_grid( + const float* pMatrix_weights, + uint32_t bx, uint32_t by, // source/from dimension (block size) + uint32_t wx, uint32_t wy, // dest/to dimension (grid size) + const float* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx] + float* pDst_weights, // [wy][wx] + uint32_t num_weight_levels) +{ + const uint32_t total_block_samples = bx * by; + const float weight_levels_minus_1 = (float)(num_weight_levels - 1); + const float inv_weight_levels = 1.0f / (float)(num_weight_levels - 1); + + for (uint32_t y = 0; y < wy; y++) + { + for (uint32_t x = 0; x < wx; x++) + { + float total = 0.0f; + + // TODO - optimize! + for (uint32_t i = 0; i < total_block_samples; i++) + if (pMatrix_weights[i]) + total += pMatrix_weights[i] * (float)pSrc_weights[i]; + + pDst_weights[x + y * wx] = (float)fast_roundf_pos_int(total * weight_levels_minus_1) * inv_weight_levels; + + pMatrix_weights += total_block_samples; + } + } +} + +float decode_surrogate_and_compute_error( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + log_surrogate_astc_blk& log_block, + const astc_ldr::partition_pattern_vec* pPat, + const astc_ldr::cem_encode_params& params) +{ + vec4F dec_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + log_block.decode(block_width, block_height, dec_pixels, pPat); + + const float wr = (float)params.m_comp_weights[0]; + const float wg = (float)params.m_comp_weights[1]; + const float wb = (float)params.m_comp_weights[2]; + const float wa = (float)params.m_comp_weights[3]; + + float total_err = 0.0f; + for (uint32_t by = 0; by < block_height; by++) + { + for (uint32_t bx = 0; bx < block_width; bx++) + { + const vec4F& s = pixel_stats.m_pixels_f[bx + by * block_width]; + const vec4F& d = dec_pixels[bx + by * block_width]; + + float dr = s[0] - d[0]; + float dg = s[1] - d[1]; + float db = s[2] - d[2]; + float da = s[3] - d[3]; + + total_err += (wr * dr * dr) + (wg * dg * dg) + (wb * db * db) + (wa * da * da); + } // bx + + } // by + + return total_err; +} + +// Returns WSSE error +float encode_surrogate_trial( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + uint32_t cem_index, + int ccs_index, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint32_t grid_width, uint32_t grid_height, + log_surrogate_astc_blk& log_block, + const astc_ldr::cem_encode_params& params, + uint32_t flags) +{ + const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + const bool dual_plane_flag = (ccs_index >= 0); + + const basist::astc_ldr_t::astc_block_grid_data* pBlock_grid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, grid_width, grid_height); + + const float* pDownsample_matrix = nullptr; + if (is_downsampling) + pDownsample_matrix = pBlock_grid_data->m_downsample_matrix.get_ptr(); + + //const uint32_t total_block_pixels = block_width * block_height; + //const uint32_t total_grid_pixels = grid_width * grid_height; + + log_block.m_cem_index = cem_index; + log_block.m_ccs_index = ccs_index; + log_block.m_grid_width = grid_width; + log_block.m_grid_height = grid_height; + log_block.m_num_parts = 1; + log_block.m_seed_index = 0; + clear_obj(log_block.m_scales); + log_block.m_num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + log_block.m_num_weight_levels = astc_helpers::get_ise_levels(weight_ise_range); + + float wsse_err = 0.0f; + + if (is_downsampling) + { + float temp_weights0[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], temp_weights1[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + astc_ldr::cem_surrogate_encode_pixels( + cem_index, ccs_index, + pixel_stats, params, + endpoint_ise_range, weight_ise_range, + log_block.m_endpoints[0][0], log_block.m_endpoints[0][1], log_block.m_scales[0], temp_weights0, temp_weights1, + flags); + + downsample_float_weight_grid( + pDownsample_matrix, + block_width, block_height, + grid_width, grid_height, + temp_weights0, + log_block.m_weights0, + log_block.m_num_weight_levels); + + if (dual_plane_flag) + { + downsample_float_weight_grid( + pDownsample_matrix, + block_width, block_height, + grid_width, grid_height, + temp_weights1, + log_block.m_weights1, + log_block.m_num_weight_levels); + } + + wsse_err = decode_surrogate_and_compute_error(block_width, block_height, pixel_stats, log_block, nullptr, params); + } + else + { + wsse_err = astc_ldr::cem_surrogate_encode_pixels( + cem_index, ccs_index, + pixel_stats, params, + endpoint_ise_range, weight_ise_range, + log_block.m_endpoints[0][0], log_block.m_endpoints[0][1], log_block.m_scales[0], log_block.m_weights0, log_block.m_weights1, + flags); + +#if defined(_DEBUG) || defined(DEBUG) + { + float alt_wsse_err = decode_surrogate_and_compute_error(block_width, block_height, pixel_stats, log_block, nullptr, params); + assert(fabs(wsse_err - alt_wsse_err) < .00125f); + } +#endif + } + + return wsse_err; +} + +float encode_surrogate_trial_subsets( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixel_stats, + uint32_t cem_index, + uint32_t num_subsets, uint32_t pat_seed_index, const astc_ldr::partition_pattern_vec* pPat, + uint32_t endpoint_ise_range, uint32_t weight_ise_range, + uint32_t grid_width, uint32_t grid_height, + log_surrogate_astc_blk& log_block, + const astc_ldr::cem_encode_params& params, + uint32_t flags) +{ + assert((num_subsets >= 2) && (num_subsets <= astc_helpers::MAX_PARTITIONS)); + + const bool is_downsampling = (grid_width < block_width) || (grid_height < block_height); + //const uint32_t total_block_pixels = block_width * block_height; + //const uint32_t total_grid_pixels = grid_width * grid_height; + + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(weight_ise_range); + const uint32_t num_endpoint_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + const basist::astc_ldr_t::astc_block_grid_data* pBlock_grid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, grid_width, grid_height); + + const float* pDownsample_matrix = nullptr; + if (is_downsampling) + pDownsample_matrix = pBlock_grid_data->m_downsample_matrix.get_ptr(); + + color_rgba part_pixels[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint32_t num_part_pixels[astc_helpers::MAX_PARTITIONS] = { 0 }; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba& px = pixel_stats.m_pixels[x + y * block_width]; + + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_subsets); + + part_pixels[part_index][num_part_pixels[part_index]] = px; + num_part_pixels[part_index]++; + } // x + } // y + +#if defined(_DEBUG) || defined(DEBUG) + for (uint32_t i = 0; i < num_subsets; i++) + assert(num_part_pixels[i] > 0); +#endif + + astc_ldr::pixel_stats_t part_pixel_stats[astc_helpers::MAX_PARTITIONS]; + + for (uint32_t i = 0; i < num_subsets; i++) + part_pixel_stats[i].clear(); + + float part_weights[astc_helpers::MAX_PARTITIONS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + float temp_block_weights[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + double total_subset_err = 0.0f; + for (uint32_t part_index = 0; part_index < num_subsets; part_index++) + { + part_pixel_stats[part_index].init(num_part_pixels[part_index], &part_pixels[part_index][0]); + + float subset_err = astc_ldr::cem_surrogate_encode_pixels( + cem_index, -1, + part_pixel_stats[part_index], params, + endpoint_ise_range, weight_ise_range, + log_block.m_endpoints[part_index][0], log_block.m_endpoints[part_index][1], + log_block.m_scales[part_index], part_weights[part_index], temp_block_weights, + flags); + + total_subset_err += subset_err; + + } // part_index + + float* pDst_weights = is_downsampling ? temp_block_weights : log_block.m_weights0; + + clear_obj(num_part_pixels); + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const uint32_t part_index = (*pPat)(x, y); + assert(part_index < num_subsets); + + pDst_weights[x + y * block_width] = part_weights[part_index][num_part_pixels[part_index]]; + num_part_pixels[part_index]++; + } // x + } // y + + log_block.m_cem_index = cem_index; + log_block.m_ccs_index = -1; + log_block.m_num_endpoint_levels = num_endpoint_levels; + log_block.m_num_weight_levels = num_weight_levels; + log_block.m_grid_width = grid_width; + log_block.m_grid_height = grid_height; + log_block.m_num_parts = num_subsets; + log_block.m_seed_index = pat_seed_index; + + if (is_downsampling) + { + downsample_float_weight_grid( + pDownsample_matrix, + block_width, block_height, + grid_width, grid_height, + temp_block_weights, + log_block.m_weights0, + astc_helpers::get_ise_levels(weight_ise_range)); + + total_subset_err = decode_surrogate_and_compute_error(block_width, block_height, pixel_stats, log_block, pPat, params); + } + +#if defined(_DEBUG) || defined(DEBUG) + if (!is_downsampling) + { + float alt_subset_err = decode_surrogate_and_compute_error(block_width, block_height, pixel_stats, log_block, pPat, params); + + assert(fabs(total_subset_err - alt_subset_err) < .00125f); + } +#endif + + return (float)total_subset_err; +} + +#if 0 +static inline vec4F vec4F_norm_approx(vec4F axis) +{ + float l = axis.norm(); + axis = (fabs(l) >= SMALL_FLOAT_VAL) ? (axis * bu_math::inv_sqrt(l)) : vec4F(.5f); + return axis; +} +#endif + +static bool estimate_partition2( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixels, + int* pBest_parts, uint32_t num_best_parts, // unique indices, not ASTC seeds + const astc_ldr::partitions_data* pPart_data, bool brute_force_flag) +{ + assert(num_best_parts && (num_best_parts <= pPart_data->m_total_unique_patterns)); + + const uint32_t num_block_pixels = block_width * block_height; + + if (brute_force_flag) + { + int desired_parts[astc_ldr::ASTC_LDR_MAX_BLOCK_HEIGHT][astc_ldr::ASTC_LDR_MAX_BLOCK_WIDTH]; // [y][x] + + for (uint32_t i = 0; i < num_block_pixels; i++) + { + float proj = (pixels.m_pixels_f[i] - pixels.m_mean_f).dot(pixels.m_mean_rel_axis4); + + desired_parts[i / block_width][i % block_width] = proj < 0.0f; + } + + uint32_t part_similarity[astc_helpers::NUM_PARTITION_PATTERNS]; + + for (uint32_t part_index = 0; part_index < pPart_data->m_total_unique_patterns; part_index++) + { + const astc_ldr::partition_pattern_vec& pat_vec = pPart_data->m_partition_pats[part_index]; + + int total_sim_non_inv = 0; + int total_sim_inv = 0; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + int part = pat_vec[x + y * block_width]; + + if (part == desired_parts[y][x]) + total_sim_non_inv++; + + if ((part ^ 1) == desired_parts[y][x]) + total_sim_inv++; + } + } + + int total_sim = maximum(total_sim_non_inv, total_sim_inv); + + part_similarity[part_index] = (total_sim << 16) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + pPart_data->m_total_unique_patterns); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[(pPart_data->m_total_unique_patterns - 1) - i] & 0xFFFF; + } + else + { + astc_ldr::partition_pattern_vec desired_part(block_width, block_height); + + for (uint32_t i = 0; i < num_block_pixels; i++) + { + float proj = (pixels.m_pixels_f[i] - pixels.m_mean_f).dot(pixels.m_mean_rel_axis4); + + desired_part.m_parts[i] = proj < 0.0f; + } + + astc_ldr::vp_tree::result_queue results; + results.reserve(num_best_parts); + + pPart_data->m_part_vp_tree.find_nearest(2, desired_part, results, num_best_parts); + + assert(results.get_size() == num_best_parts); + + const auto& elements = results.get_elements(); + + for (uint32_t i = 0; i < results.get_size(); i++) + pBest_parts[i] = elements[1 + i].m_pat_index; + } + + return true; +} + +static bool estimate_partition3( + uint32_t block_width, uint32_t block_height, + const astc_ldr::pixel_stats_t& pixels, + int* pBest_parts, uint32_t num_best_parts, + const astc_ldr::partitions_data* pPart_data, bool brute_force_flag) +{ + assert(num_best_parts && (num_best_parts <= pPart_data->m_total_unique_patterns)); + + vec4F training_vecs[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS], mean(0.0f); + + const uint32_t num_block_pixels = block_width * block_height, NUM_SUBSETS = 3; + + float brightest_inten = 0.0f, darkest_inten = BIG_FLOAT_VAL; + vec4F cluster_centroids[NUM_SUBSETS]; + clear_obj(cluster_centroids); + + for (uint32_t i = 0; i < num_block_pixels; i++) + { + vec4F& v = training_vecs[i]; + + v = pixels.m_pixels_f[i]; + + float inten = v.dot(vec4F(1.0f)); + if (inten < darkest_inten) + { + darkest_inten = inten; + cluster_centroids[0] = v; + } + + if (inten > brightest_inten) + { + brightest_inten = inten; + cluster_centroids[1] = v; + } + } + + if (cluster_centroids[0] == cluster_centroids[1]) + return false; + + float furthest_dist2 = 0.0f; + for (uint32_t i = 0; i < num_block_pixels; i++) + { + vec4F& v = training_vecs[i]; + + float dist_a = v.squared_distance(cluster_centroids[0]); + if (dist_a == 0.0f) + continue; + + float dist_b = v.squared_distance(cluster_centroids[1]); + if (dist_b == 0.0f) + continue; + + float dist2 = dist_a + dist_b; + if (dist2 > furthest_dist2) + { + furthest_dist2 = dist2; + cluster_centroids[2] = v; + } + } + + if ((cluster_centroids[0] == cluster_centroids[2]) || (cluster_centroids[1] == cluster_centroids[2])) + return false; + + uint32_t cluster_pixels[NUM_SUBSETS][astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + uint32_t num_cluster_pixels[NUM_SUBSETS]; + vec4F new_cluster_means[NUM_SUBSETS]; + + const uint32_t NUM_ITERS = 4; + + for (uint32_t s = 0; s < NUM_ITERS; s++) + { + memset(num_cluster_pixels, 0, sizeof(num_cluster_pixels)); + memset((void *)new_cluster_means, 0, sizeof(new_cluster_means)); + + for (uint32_t i = 0; i < num_block_pixels; i++) + { + float d[NUM_SUBSETS] = { + training_vecs[i].squared_distance(cluster_centroids[0]), + training_vecs[i].squared_distance(cluster_centroids[1]), + training_vecs[i].squared_distance(cluster_centroids[2]) }; + + float min_d = d[0]; + uint32_t min_idx = 0; + for (uint32_t j = 1; j < NUM_SUBSETS; j++) + { + if (d[j] < min_d) + { + min_d = d[j]; + min_idx = j; + } + } + + cluster_pixels[min_idx][num_cluster_pixels[min_idx]] = i; + new_cluster_means[min_idx] += training_vecs[i]; + num_cluster_pixels[min_idx]++; + } // i + + // Can skip updating the centroids on the last iteration - all we care about is the final partitioning. + if (s == (NUM_ITERS - 1)) + { + for (uint32_t j = 0; j < NUM_SUBSETS; j++) + { + if (!num_cluster_pixels[j]) + return false; + } + } + else + { + for (uint32_t j = 0; j < NUM_SUBSETS; j++) + { + if (!num_cluster_pixels[j]) + return false; + + cluster_centroids[j] = new_cluster_means[j] / (float)num_cluster_pixels[j]; + } // j + } + + } // s + + astc_ldr::partition_pattern_vec desired_part(block_width, block_height); + + for (uint32_t p = 0; p < NUM_SUBSETS; p++) + { + for (uint32_t i = 0; i < num_cluster_pixels[p]; i++) + { + const uint32_t pix_index = cluster_pixels[p][i]; + desired_part[pix_index] = (uint8_t)p; + } // i + } // p + + if (brute_force_flag) + { + astc_ldr::partition_pattern_vec desired_parts[astc_ldr::NUM_PART3_MAPPINGS]; + for (uint32_t j = 0; j < astc_ldr::NUM_PART3_MAPPINGS; j++) + desired_parts[j] = desired_part.get_permuted3(j); + + uint32_t part_similarity[astc_helpers::NUM_PARTITION_PATTERNS]; + + for (uint32_t part_index = 0; part_index < pPart_data->m_total_unique_patterns; part_index++) + { + const astc_ldr::partition_pattern_vec& pat = pPart_data->m_partition_pats[part_index]; + + uint32_t lowest_pat_dist = UINT32_MAX; + for (uint32_t p = 0; p < astc_ldr::NUM_PART3_MAPPINGS; p++) + { + uint32_t dist = pat.get_squared_distance(desired_parts[p]); + if (dist < lowest_pat_dist) + lowest_pat_dist = dist; + } + + part_similarity[part_index] = (lowest_pat_dist << 16) | part_index; + + } // part_index; + + std::sort(part_similarity, part_similarity + pPart_data->m_total_unique_patterns); + + for (uint32_t i = 0; i < num_best_parts; i++) + pBest_parts[i] = part_similarity[i] & 0xFFFF; + } + else + { + astc_ldr::vp_tree::result_queue results; + results.reserve(num_best_parts); + + pPart_data->m_part_vp_tree.find_nearest(3, desired_part, results, num_best_parts); + + assert(results.get_size() == num_best_parts); + + const auto& elements = results.get_elements(); + + for (uint32_t i = 0; i < results.get_size(); i++) + pBest_parts[i] = elements[1 + i].m_pat_index; + } + + return true; +} + +//--------------------------------------------------------------------- + +static const float g_sobel_x[3][3] = // [y][x] +{ + { -1.0f, 0.0f, 1.0f }, + { -2.0f, 0.0f, 2.0f }, + { -1.0f, 0.0f, 1.0f } +}; + +static const float g_sobel_y[3][3] = // [y][x] +{ + { -1.0f, -2.0f, -1.0f }, + { 0.0f, 0.0f, 0.0f }, + { 1.0f, 2.0f, 1.0f } +}; + +void compute_sobel(const image& orig, image& dest, const float* pMatrix_3x3) +{ + const uint32_t width = orig.get_width(); + const uint32_t height = orig.get_height(); + + dest.resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (int x = 0; x < (int)width; x++) + { + vec4F d(128.0f); + + for (int my = -1; my <= 1; my++) + { + for (int mx = -1; mx <= 1; mx++) + { + float w = pMatrix_3x3[(my + 1) * 3 + (mx + 1)]; + if (w == 0.0f) + continue; + + const color_rgba& s = orig.get_clamped(x + mx, y + my); + + for (uint32_t c = 0; c < 4; c++) + d[c] += w * (float)s[c]; + + } // mx + + } // my + + dest(x, y).set(fast_roundf_int(d[0]), fast_roundf_int(d[1]), fast_roundf_int(d[2]), fast_roundf_int(d[3])); + + } // x + } // y +} + +void compute_energy_from_dct(uint32_t block_width, uint32_t block_height, float* pDCT) +{ + const uint32_t num_texels = block_width * block_height; + + for (uint32_t i = 1; i < num_texels; i++) + pDCT[i] = square(pDCT[i]); + + pDCT[0] = 0.0f; +} + +// Results scaled by # block texels (block-SSE in weight space) +float compute_preserved_dct_energy(uint32_t block_width, uint32_t block_height, const float* pEnergy, uint32_t grid_w, uint32_t grid_h) +{ + float tot = 0.0f; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + if ((x < grid_w) && (y < grid_h)) + tot += pEnergy[x + y * block_width]; + } + } + + return tot; +} + +// Results scaled by # block texels (block-SSE in weight space) +inline float compute_lost_dct_energy(uint32_t block_width, uint32_t block_height, const float* pEnergy, uint32_t grid_w, uint32_t grid_h) +{ + float tot = 0.0f; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + if ((x < grid_w) && (y < grid_h)) + continue; + + tot += pEnergy[x + y * block_width]; + } + } + + return tot; +} + +struct ldr_astc_lowlevel_block_encoder_params +{ + ldr_astc_lowlevel_block_encoder_params() + { + clear(); + } + + void clear() + { + clear_obj(*this); + + for (uint32_t i = 0; i < 4; i++) + m_dp_active_chans[i] = true; + + m_subsets_edge_filtering = true; + + m_use_superbuckets = true; + m_bucket_pruning_passes = true; + m_use_dual_planes = true; + + m_superbucket_max_to_retain[0] = 4; + m_superbucket_max_to_retain[1] = 8; + m_superbucket_max_to_retain[2] = 16; + + m_shortlist_buckets_to_examine_fract = 1.0f; // after high-level bucket surrogate encoding and pruning stages, 1.0=effectively disabled + m_shortlist_buckets_to_examine_min = 1; + m_shortlist_buckets_to_examine_max = 1024; + + // TODO: Expose these at a higher level. Add alpha specific? + m_num_similar_modes_in_bucket_to_shortlist_fract = .33f; + m_num_similar_modes_in_bucket_to_shortlist_fract_min = 2; + m_num_similar_modes_in_bucket_to_shortlist_fract_max = 4096; + + m_final_shortlist_fraction[0] = .2f; + m_final_shortlist_fraction[1] = .3f; + m_final_shortlist_fraction[2] = .5f; + m_final_shortlist_min_size[0] = 1; + m_final_shortlist_min_size[1] = 1; + m_final_shortlist_min_size[2] = 1; + m_final_shortlist_max_size[0] = 4096; + m_final_shortlist_max_size[1] = 4096; + m_final_shortlist_max_size[2] = 4096; + + m_gradient_descent_flag = true; + m_polish_weights_flag = true; + m_qcd_enabled_flag = true; + + m_final_encode_try_base_ofs = true; + m_final_encode_always_try_rgb_direct = false; // if true, even if base_ofs succeeds, we try RGB/RGBA direct too + + m_use_parts_std_dev_thresh = (8.0f / 255.0f); + m_use_parts_std_dev_thresh2 = (40.0f / 255.0f); + m_sobel_energy_thresh1 = 3200.0f; + m_sobel_energy_thresh2 = 30000.0f; + m_sobel_energy_thresh3 = 50000.0f; + + m_part2_fraction_to_keep = 2; + m_part3_fraction_to_keep = 2; + m_base_parts2 = 32; + m_base_parts3 = 32; + + // TODO: Prehaps expose this at a higher level. + m_use_blue_contraction = true; + } + + uint32_t m_bx, m_by, m_block_width, m_block_height, m_total_block_pixels; + + const image* m_pOrig_img_sobel_xy_t; + + const astc_ldr::partitions_data* m_pPart_data_p2; + const astc_ldr::partitions_data* m_pPart_data_p3; + + const astc_ldr::cem_encode_params* m_pEnc_params; + + // RGB or alpha trial lists (shouldn't have both in same lists) + uint32_t m_num_trial_modes; + const basist::astc_ldr_t::trial_mode* m_pTrial_modes; + + const basist::astc_ldr_t::grouped_trial_modes* m_pGrouped_trial_modes; + + uint32_t m_superbucket_max_to_retain[3]; // [block_complexity_index] + + float m_shortlist_buckets_to_examine_fract; + uint32_t m_shortlist_buckets_to_examine_min; + uint32_t m_shortlist_buckets_to_examine_max; + + float m_num_similar_modes_in_bucket_to_shortlist_fract; + uint32_t m_num_similar_modes_in_bucket_to_shortlist_fract_min; + uint32_t m_num_similar_modes_in_bucket_to_shortlist_fract_max; + + float m_final_shortlist_fraction[3]; + uint32_t m_final_shortlist_min_size[3]; + uint32_t m_final_shortlist_max_size[3]; + + bool m_use_superbuckets; + bool m_bucket_pruning_passes; + + // true if this is a trial mode list containing alpha + bool m_alpha_cems; + + bool m_use_alpha_or_opaque_modes; // true for only alpha cems, false of only opaque cems; + bool m_use_lum_direct_modes; + bool m_use_base_scale_modes; + bool m_use_direct_modes; + bool m_use_dual_planes; + + bool m_grid_hv_filtering; + bool m_filter_horizontally_flag; // = h_energy_lost < v_energy_lost, if true it's visually better to resample the block on the X axis vs. Y + bool m_use_small_grids_only; + + bool m_dp_active_chans[4]; + + bool m_subsets_enabled; + bool m_subsets_edge_filtering; + + // TODO: Make polishing controllable per superpass. + bool m_gradient_descent_flag; + bool m_polish_weights_flag; + bool m_qcd_enabled_flag; + + bool m_final_encode_try_base_ofs; + bool m_final_encode_always_try_rgb_direct; + + bool m_brute_force_est_parts; + bool m_disable_part_est_stage2; // only use single stage partition estimation + + bool m_use_blue_contraction; // currently global enable/disable + + float m_use_parts_std_dev_thresh; + float m_use_parts_std_dev_thresh2; + float m_sobel_energy_thresh1; + float m_sobel_energy_thresh2; + float m_sobel_energy_thresh3; + + uint32_t m_part2_fraction_to_keep; + uint32_t m_part3_fraction_to_keep; + uint32_t m_base_parts2; + uint32_t m_base_parts3; + + float m_early_stop_wpsnr; + float m_early_stop2_wpsnr; + + basist::astc_ldr_t::dct2f* m_pDCT2F; // at block size +}; + +struct trial_surrogate +{ + uint32_t m_trial_mode_index; + float m_err; + + log_surrogate_astc_blk m_log_blk; + + void clear() + { + m_trial_mode_index = 0; + m_err = 0; + m_log_blk.clear(); + } + + bool operator < (const trial_surrogate& rhs) const + { + return m_err < rhs.m_err; + } +}; + +struct encode_block_output +{ + int16_t m_trial_mode_index; // -1 = solid, no trial mode + uint16_t m_blur_id; // blur index + + astc_helpers::log_astc_block m_log_blk; + + // Packed per-plane DCT data + basist::astc_ldr_t::dct_syms m_packed_dct_plane_data[2]; + + uint64_t m_sse; + + void clear() + { + m_trial_mode_index = -1; + m_blur_id = 0; + m_log_blk.clear(); + m_sse = 0; + } +}; + +struct encode_block_stats +{ + uint32_t m_total_superbuckets_created; + uint32_t m_total_buckets_created; + uint32_t m_total_surrogate_encodes; + uint32_t m_total_shortlist_candidates; + uint32_t m_total_full_encodes; + + encode_block_stats() { clear(); } + + void clear() + { + clear_obj(*this); + } +}; + +struct chan_mse_est +{ + float m_ep; + float m_wp; + + chan_mse_est() {} + chan_mse_est(float ep, float wp) : m_ep(ep), m_wp(wp) {} +}; + +struct weight_terms +{ + float m_mean; + float m_var; + float m_endpoint_factor; + float m_weight_spread_scale; + + void calc(uint32_t n, const float* pWeights) + { + assert(n); + + float weight_total = 0.0f; + for (uint32_t i = 0; i < n; i++) + { + assert(is_in_range(pWeights[i], 0.0f, 1.0f)); + weight_total += pWeights[i]; + } + m_mean = weight_total / (float)n; + + float weight_var = 0.0f; + for (uint32_t i = 0; i < n; i++) + weight_var += squaref(pWeights[i] - m_mean); + m_var = weight_var / (float)n; + + // drops below 2/3 on smooth blocks and tends to 2/3 when weights are well spread + m_endpoint_factor = (1.0f + 2.0f * m_var + 2.0f * m_mean * m_mean - 2.0f * m_mean) / (2.0f / 3.0f); + m_endpoint_factor = clamp(m_endpoint_factor, .25f, 1.50f); + + const float UNIFORM_VAR = 1.0f / 12.0f; + float s = m_var / UNIFORM_VAR; + + // shrinks the weight term on smooth blocks and is ~1 when weights are spread. + m_weight_spread_scale = saturate(s); + } +}; + +// weight_gamma is block size/grid size specific factor (0,1] (the amount of MSE quant error remaining taking into account bilinear smoothing) +inline chan_mse_est compute_quantized_channel_mse_estimates(uint32_t num_endpoint_levels, uint32_t num_weight_levels, float span_size, float weight_gamma, const weight_terms* pWeight_terms = nullptr) +{ + assert(num_endpoint_levels >= 2); + assert(num_weight_levels >= 2); + + const float Dep = 1.0f / (float)(num_endpoint_levels - 1); // endpoint quant step + const float Dw = 1.0f / (float)(num_weight_levels - 1); // weight quant step + + // Endpoint quant MSE estimate is not span dependent + float ep_lower = (Dep * Dep) / 12.0f * (2.0f / 3.0f); + + // Weight quant MSE estimate is span dependent + float wq_lower = (Dw * Dw) / 12.0f * weight_gamma * (span_size * span_size); + + if (pWeight_terms) + { + ep_lower *= pWeight_terms->m_endpoint_factor; + wq_lower *= pWeight_terms->m_weight_spread_scale; + } + + return chan_mse_est(ep_lower, wq_lower); +} + +inline float compute_quantized_channel_endpoint_mse_estimate(uint32_t num_endpoint_levels, const weight_terms* pWeight_terms = nullptr) +{ + assert(num_endpoint_levels >= 2); + + const float Dep = 1.0f / (float)(num_endpoint_levels - 1); // endpoint quant step + + // Endpoint quant MSE estimate is not span dependent + float ep_lower = (Dep * Dep) / 12.0f * (2.0f / 3.0f); + + if (pWeight_terms) + ep_lower *= pWeight_terms->m_endpoint_factor; + + return ep_lower; +} + +inline float compute_quantized_channel_weight_mse_estimate(uint32_t num_weight_levels, float span_size, float weight_gamma, const weight_terms* pWeight_terms = nullptr) +{ + assert(num_weight_levels >= 2); + + const float Dw = 1.0f / (float)(num_weight_levels - 1); // weight quant step + + // Weight quant MSE estimate is span dependent + float wq_lower = (Dw * Dw) / 12.0f * weight_gamma * (span_size * span_size); + + if (pWeight_terms) + wq_lower *= pWeight_terms->m_weight_spread_scale; + + return wq_lower; +} + +const float BLUE_CONTRACTION_BASE_OFS_DISCOUNT = .9f; +const float SKIP_IF_BUCKET_WORSE_MULTIPLIER = 5.0f; + +struct shortlist_bucket +{ + bool m_examined_flag; + int8_t m_grid_width, m_grid_height; + int8_t m_ccs_index; + + uint8_t m_cem_index; + uint8_t m_num_parts; + uint16_t m_unique_seed_index; + + log_surrogate_astc_blk m_surrogate_log_blk; + float m_sse; + + shortlist_bucket() + { + } + + shortlist_bucket(int grid_width, int grid_height, uint32_t cem_index, int ccs_index, uint32_t num_parts, uint32_t unique_seed_index) : + m_grid_width((int8_t)grid_width), m_grid_height((int8_t)grid_height), + m_ccs_index((int8_t)ccs_index), + m_cem_index((uint8_t)cem_index), + m_num_parts((uint8_t)num_parts), + m_unique_seed_index((uint16_t)unique_seed_index) + { + m_surrogate_log_blk.clear(); + m_sse = 0.0f; + m_examined_flag = false; + } + + operator size_t() const + { +#define ADD_HASH(H) h ^= basist::hash_hsieh((uint8_t*)&(H), sizeof(H)); + size_t h = 0; + ADD_HASH(m_grid_width); + ADD_HASH(m_grid_height); + ADD_HASH(m_ccs_index); + ADD_HASH(m_cem_index); + ADD_HASH(m_num_parts); + ADD_HASH(m_unique_seed_index); +#undef ADD_HASH + return h; + } + + // equality for hashing + bool operator== (const shortlist_bucket& rhs) const + { + return (m_grid_width == rhs.m_grid_width) && (m_grid_height == rhs.m_grid_height) && (m_cem_index == rhs.m_cem_index) && (m_ccs_index == rhs.m_ccs_index) && + (m_num_parts == rhs.m_num_parts) && (m_unique_seed_index == rhs.m_unique_seed_index); + } +}; + +typedef static_vector trial_mode_index_vec; +typedef basisu::hash_map shortlist_bucket_hash_t; + +#pragma pack(push, 1) +struct trial_mode_estimate_superbucket_key +{ + // All member vars from beginning to m_last will be hashed. Be careful of alignment. + uint8_t m_cem_index; + int8_t m_ccs_index; + uint16_t m_subset_unique_index; + + uint8_t m_num_subsets; + uint8_t m_last; + uint8_t m_unused[2]; + + trial_mode_estimate_superbucket_key() + { + static_assert((sizeof(*this) % 4) == 0, "struct size must be divisible by 4"); + } + + void clear() + { + clear_obj(*this); + } + + operator size_t() const + { + return basist::hash_hsieh((const uint8_t*)this, BASISU_OFFSETOF(trial_mode_estimate_superbucket_key, m_last)); + } + + bool operator== (const trial_mode_estimate_superbucket_key& rhs) const + { +#define COMP(e) if (e != rhs.e) return false; + COMP(m_cem_index); + COMP(m_ccs_index); + COMP(m_subset_unique_index); + COMP(m_num_subsets); +#undef COMP + return true; + } +}; +#pragma pack(pop) + +struct trial_mode_estimate_superbucket_value +{ + basisu::vector m_trial_mode_list; +}; + +typedef hash_map trial_mode_estimate_superbucket_hash; + +struct trial_mode_estimate +{ + trial_mode_estimate_superbucket_key m_superbucket_key; + + uint32_t m_trial_mode_index; + float m_wsse; + + bool operator< (const trial_mode_estimate& rhs) const + { + return m_wsse < rhs.m_wsse; + } +}; + +struct ranked_shortlist_bucket +{ + shortlist_bucket m_bucket; + trial_mode_index_vec m_trial_mode_indices; + + bool operator < (const ranked_shortlist_bucket& rhs) const { return m_bucket.m_sse < rhs.m_bucket.m_sse; } +}; + +struct ldr_astc_lowlevel_block_encoder +{ + ldr_astc_lowlevel_block_encoder() : + m_used_flag(false) + { + clear(); + } + + // Warning: These objects can migrate between threads (be cautious of determinism issues with containers/hash tables!) + bool m_used_flag; + + // Thread-local data follows + uint_vec m_trial_modes_to_estimate; + + trial_mode_estimate_superbucket_hash m_superbucket_hash; + + std::priority_queue m_trial_mode_estimate_priority_queue; + + basist::astc_ldr_t::fvec m_dct_work; + + shortlist_bucket_hash_t m_shortlist_hash0; + shortlist_bucket_hash_t m_shortlist_hash1; + + basisu::vector m_trial_surrogates; + + float m_sobel_energy; + float m_max_std_dev; + + uint32_t m_block_complexity_index; // [0,2] + bool m_strong_edges; + bool m_very_strong_edges; + bool m_super_strong_edges; + + bool m_used_superbuckets; + + int m_best_parts2[2][MAX_BASE_PARTS2 * PART_ESTIMATE_STAGE1_MULTIPLIER]; // [rgb[a]direct/rgbs][est_part] + int m_num_est_parts2[2]; + + int m_best_parts3[2][MAX_BASE_PARTS3 * PART_ESTIMATE_STAGE1_MULTIPLIER]; // [rgb[a]direct/rgbs][est_part] + int m_num_est_parts3[2]; + + basisu::vector m_ranked_buckets; + + void clear() + { + m_trial_modes_to_estimate.resize(0); + m_superbucket_hash.reset(); + + m_trial_surrogates.resize(0); + + m_sobel_energy = 0; + m_max_std_dev = 0; + m_block_complexity_index = 0; + m_strong_edges = false; + m_very_strong_edges = false; + m_super_strong_edges = false; + + m_used_superbuckets = false; + + clear_obj(m_best_parts2); + clear_obj(m_num_est_parts2); + + clear_obj(m_best_parts3); + clear_obj(m_num_est_parts3); + + m_ranked_buckets.resize(0); + } + + bool init( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + BASISU_NOTE_UNUSED(stats); + + // TODO: This sums the *original* (not blurred) block's energy - precompute this? Replace with DCT? + m_sobel_energy = 0.0f; + for (uint32_t y = 0; y < p.m_block_height; y++) + { + for (uint32_t x = 0; x < p.m_block_width; x++) + { + const color_rgba& s = p.m_pOrig_img_sobel_xy_t->get_clamped(p.m_bx * p.m_block_width + x, p.m_by * p.m_block_height + y); + + // TODO: sum max of all channels instead? + m_sobel_energy += s[0] * s[0] + s[1] * s[1] + s[2] * s[2] + s[3] * s[3]; + } // x + } // y + + m_sobel_energy /= (float)p.m_total_block_pixels; + + m_max_std_dev = 0.0f; + for (uint32_t i = 0; i < 4; i++) + m_max_std_dev = maximum(m_max_std_dev, pixel_stats.m_rgba_stats[i].m_std_dev); + + m_strong_edges = (m_max_std_dev > p.m_use_parts_std_dev_thresh) && (m_sobel_energy > p.m_sobel_energy_thresh1); + m_very_strong_edges = (m_max_std_dev > p.m_use_parts_std_dev_thresh2) && (m_sobel_energy > p.m_sobel_energy_thresh2); + m_super_strong_edges = (m_max_std_dev > p.m_use_parts_std_dev_thresh2) && (m_sobel_energy > p.m_sobel_energy_thresh3); + + m_block_complexity_index = m_super_strong_edges ? 2 : (m_very_strong_edges ? 1 : 0); + + return true; + } + + bool partition_triage( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + + clear_obj(m_num_est_parts2); + clear_obj(m_num_est_parts3); + + if (!p.m_subsets_enabled) + return true; + + if (p.m_subsets_edge_filtering) + { + if (!m_strong_edges) + return true; + } + + assert(p.m_base_parts2 <= MAX_BASE_PARTS2); + assert(p.m_base_parts3 <= MAX_BASE_PARTS3); + + // 2 subsets + int total_parts2 = m_super_strong_edges ? (p.m_base_parts2 * PART_ESTIMATE_STAGE1_MULTIPLIER) : (m_very_strong_edges ? (p.m_base_parts2 * 2) : p.m_base_parts2); + total_parts2 = minimum(total_parts2, MAX_BASE_PARTS2 * PART_ESTIMATE_STAGE1_MULTIPLIER); + total_parts2 = minimum(total_parts2, p.m_pPart_data_p2->m_total_unique_patterns); + + const uint32_t surrogate_encode_flags = 0; + + if (total_parts2) + { + int best_parts2_temp[MAX_BASE_PARTS2 * PART_ESTIMATE_STAGE1_MULTIPLIER]; + assert(total_parts2 <= (int)std::size(best_parts2_temp)); + + // Stage 1: kmeans+vptree + const bool has_est_parts2 = estimate_partition2( + p.m_block_width, p.m_block_height, + pixel_stats, + best_parts2_temp, total_parts2, + p.m_pPart_data_p2, p.m_brute_force_est_parts); + + if (has_est_parts2) + { + // Always try direct, optionally base+scale cem's + for (uint32_t s = 0; s < 2; s++) + { + if ((s) && (!p.m_use_base_scale_modes)) + continue; + + if (p.m_disable_part_est_stage2) + { + m_num_est_parts2[s] = total_parts2; + memcpy(m_best_parts2[s], best_parts2_temp, m_num_est_parts2[s] * sizeof(int)); + continue; + } + + uint32_t cem_to_surrogate_encode = p.m_alpha_cems ? astc_helpers::CEM_LDR_RGBA_DIRECT : astc_helpers::CEM_LDR_RGB_DIRECT; + if (s) + cem_to_surrogate_encode = p.m_alpha_cems ? astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A : astc_helpers::CEM_LDR_RGB_BASE_SCALE; + + // Stage 2: Analytic surrogate WSSE + basisu::vector part_sses(total_parts2); + + for (int i = 0; i < total_parts2; i++) + { + const astc_ldr::partitions_data* pPart_data = p.m_pPart_data_p2; + + const uint32_t unique_seed_index = best_parts2_temp[i]; + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[unique_seed_index]; + + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[unique_seed_index]; + + log_surrogate_astc_blk surrogate_log_blk; + float sse = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + cem_to_surrogate_encode, 2, part_seed_index, pPat, + astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_64_LEVELS, + p.m_block_width, p.m_block_height, + surrogate_log_blk, + *p.m_pEnc_params, surrogate_encode_flags); + + stats.m_total_surrogate_encodes++; + + part_sses[i] = sse; + } // i + + basisu::vector part_sses_ranks(total_parts2); + + indirect_sort(total_parts2, part_sses_ranks.get_ptr(), part_sses.get_ptr()); + + m_num_est_parts2[s] = maximum(1, (total_parts2 + p.m_part2_fraction_to_keep - 1) / p.m_part2_fraction_to_keep); + + for (int i = 0; i < m_num_est_parts2[s]; i++) + { + const uint32_t rank_index = part_sses_ranks[i]; + const uint32_t unique_seed_unique = best_parts2_temp[rank_index]; + m_best_parts2[s][i] = unique_seed_unique; + } // i + + } // s + + } // if (has_est_parts2) + + } // if (total_parts2) + + // 3 subsets + int total_parts3 = m_super_strong_edges ? (p.m_base_parts3 * PART_ESTIMATE_STAGE1_MULTIPLIER) : (m_very_strong_edges ? (p.m_base_parts3 * 2) : p.m_base_parts3); + total_parts3 = minimum(total_parts3, MAX_BASE_PARTS3 * PART_ESTIMATE_STAGE1_MULTIPLIER); + total_parts3 = minimum(total_parts3, p.m_pPart_data_p3->m_total_unique_patterns); + + if (total_parts3) + { + int best_parts3_temp[MAX_BASE_PARTS3 * PART_ESTIMATE_STAGE1_MULTIPLIER]; + assert(total_parts3 <= (int)std::size(best_parts3_temp)); + + // Stage 1: kmeans+vptree + const bool has_est_parts3 = estimate_partition3( + p.m_block_width, p.m_block_height, + pixel_stats, + best_parts3_temp, total_parts3, + p.m_pPart_data_p3, p.m_brute_force_est_parts); + + if (has_est_parts3) + { + // Always try direct, optionally base+scale cem's + for (uint32_t s = 0; s < 2; s++) + { + if ((s) && (!p.m_use_base_scale_modes)) + continue; + + if (p.m_disable_part_est_stage2) + { + m_num_est_parts3[s] = total_parts3; + memcpy(m_best_parts3[s], best_parts3_temp, m_num_est_parts3[s] * sizeof(int)); + continue; + } + + uint32_t cem_to_surrogate_encode = p.m_alpha_cems ? astc_helpers::CEM_LDR_RGBA_DIRECT : astc_helpers::CEM_LDR_RGB_DIRECT; + if (s) + cem_to_surrogate_encode = p.m_alpha_cems ? astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A : astc_helpers::CEM_LDR_RGB_BASE_SCALE; + + // Stage 2: Analytic surrogate WSSE + basisu::vector part_sses(total_parts3); + for (int i = 0; i < total_parts3; i++) + { + const astc_ldr::partitions_data* pPart_data = p.m_pPart_data_p3; + + const uint32_t unique_seed_index = best_parts3_temp[i]; + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[unique_seed_index]; + + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[unique_seed_index]; + + log_surrogate_astc_blk surrogate_log_blk; + float sse = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + cem_to_surrogate_encode, 3, part_seed_index, pPat, + astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_64_LEVELS, + p.m_block_width, p.m_block_height, + surrogate_log_blk, + *p.m_pEnc_params, surrogate_encode_flags); + + stats.m_total_surrogate_encodes++; + + part_sses[i] = sse; + } // i + + basisu::vector part_sses_ranks(total_parts3); + + indirect_sort(total_parts3, part_sses_ranks.get_ptr(), part_sses.get_ptr()); + + m_num_est_parts3[s] = maximum(1, (total_parts3 + p.m_part3_fraction_to_keep - 1) / p.m_part3_fraction_to_keep); + + for (int i = 0; i < m_num_est_parts3[s]; i++) + { + const uint32_t rank_index = part_sses_ranks[i]; + const uint32_t unique_seed_unique = best_parts3_temp[rank_index]; + m_best_parts3[s][i] = unique_seed_unique; + } // i + + } // s + + } // if (has_est_parts3) + + } // if (total_parts3) + + return true; + } + + bool trivial_triage( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(pixel_stats); + BASISU_NOTE_UNUSED(stats); + BASISU_NOTE_UNUSED(out_blocks); + BASISU_NOTE_UNUSED(blur_id); + + if (m_trial_modes_to_estimate.capacity() < 1024) + m_trial_modes_to_estimate.reserve(1024); + m_trial_modes_to_estimate.resize(0); + + assert((astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET + 1) == basist::astc_ldr_t::OTM_NUM_CEMS); + + for (uint32_t cem_index = astc_helpers::CEM_LDR_LUM_DIRECT; cem_index < basist::astc_ldr_t::OTM_NUM_CEMS; cem_index++) + { + if (astc_helpers::does_cem_have_alpha(cem_index) != p.m_alpha_cems) + continue; + + const bool cem_has_alpha = astc_helpers::does_cem_have_alpha(cem_index); + if (cem_has_alpha != p.m_use_alpha_or_opaque_modes) + continue; + + bool accept_flag = false; + switch (cem_index) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + { + accept_flag = p.m_use_lum_direct_modes; + break; + } + case astc_helpers::CEM_LDR_RGB_DIRECT: + case astc_helpers::CEM_LDR_RGBA_DIRECT: + { + accept_flag = p.m_use_direct_modes; + break; + } + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + accept_flag = p.m_use_base_scale_modes; + break; + } + default: + break; + } + + if (!accept_flag) + continue; + + const uint32_t s = astc_helpers::cem_is_ldr_base_scale(cem_index) ? 1 : 0; + + for (uint32_t subsets_index = 0; subsets_index < basist::astc_ldr_t::OTM_NUM_SUBSETS; subsets_index++) + { + if (subsets_index == 1) + { + if (!m_num_est_parts2[s]) + continue; + } + else if (subsets_index == 2) + { + if (!m_num_est_parts3[s]) + continue; + } + + const uint32_t ccs_max_index = (p.m_use_dual_planes ? basist::astc_ldr_t::OTM_NUM_CCS : 1); + for (uint32_t ccs_index = 0; ccs_index < ccs_max_index; ccs_index++) + { + if (ccs_index) + { + if (!p.m_dp_active_chans[ccs_index - 1]) + continue; + } + + for (uint32_t grid_size_index = 0; grid_size_index < basist::astc_ldr_t::OTM_NUM_GRID_SIZES; grid_size_index++) + { + if (grid_size_index) // if large grid + { + if (p.m_use_small_grids_only) + continue; + } + + for (uint32_t grid_anisos_index = 0; grid_anisos_index < basist::astc_ldr_t::OTM_NUM_GRID_ANISOS; grid_anisos_index++) + { + if (p.m_grid_hv_filtering) + { + if (grid_anisos_index == 1) + { + // W>=H + if (p.m_filter_horizontally_flag) + continue; + } + else if (grid_anisos_index == 2) + { + // Wm_tm_groups[cem_index][subsets_index][ccs_index][grid_size_index][grid_anisos_index]); + + } // grid_aniso_index + + } // grid_size_index + + } // ccs_index + + } // subsets_index + + } // cem_iter + + if (!m_trial_modes_to_estimate.size()) + { + assert(0); + return false; + } + + return true; + } + + bool analytic_triage( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + + //--------------------------------- superbucket analytical estimation + + shortlist_bucket_hash_t& shortlist_buckets = m_shortlist_hash0; + + if (m_shortlist_hash0.get_table_size() != EXPECTED_SHORTLIST_HASH_SIZE) + { + const bool was_allocated = m_shortlist_hash0.get_table_size() > 0; + + m_shortlist_hash0.clear(); + m_shortlist_hash0.reserve(EXPECTED_SHORTLIST_HASH_SIZE / 2); + + if ((g_devel_messages) && (was_allocated)) + fmt_debug_printf("shortlist hash0 thrash\n"); + } + else + { + m_shortlist_hash0.reset(); + } + + m_used_superbuckets = false; + + if (p.m_use_superbuckets) + { + m_used_superbuckets = true; + + // This may thrash if it grows larger on another thread, but we must avoid determinism issues. + if (m_superbucket_hash.get_table_size() != EXPECTED_SUPERBUCKET_HASH_SIZE) + { + const bool was_allocated = m_superbucket_hash.get_table_size() > 0; + + m_superbucket_hash.clear(); + m_superbucket_hash.reserve(EXPECTED_SUPERBUCKET_HASH_SIZE >> 1); + + if ((g_devel_messages) && (was_allocated)) + fmt_debug_printf("superbucket hash thrash\n"); + } + else + { + m_superbucket_hash.reset(); + } + + trial_mode_estimate_superbucket_key new_key; + new_key.clear(); + + trial_mode_estimate_superbucket_value new_val; + + // Create superbuckets + uint32_t max_superbucket_tm_indices = 0; + for (uint32_t j = 0; j < m_trial_modes_to_estimate.size(); j++) + { + const uint32_t trial_mode_iter = m_trial_modes_to_estimate[j]; + + assert(trial_mode_iter < p.m_num_trial_modes); + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_iter]; + + new_key.m_cem_index = safe_cast_uint8(tm.m_cem); + new_key.m_ccs_index = safe_cast_int8(tm.m_ccs_index); + + new_key.m_subset_unique_index = 0; + new_key.m_num_subsets = (uint8_t)tm.m_num_parts; + + if (tm.m_num_parts == 1) + { + auto ins_res = m_superbucket_hash.insert(new_key, new_val); + const bool created_flag = ins_res.second; + + assert(ins_res.first->first.m_cem_index == tm.m_cem); + assert(ins_res.first->first.m_ccs_index == tm.m_ccs_index); + assert(ins_res.first->first.m_num_subsets == tm.m_num_parts); + + trial_mode_estimate_superbucket_value& v = (ins_res.first)->second; + + if (created_flag) + v.m_trial_mode_list.reserve(256); + + v.m_trial_mode_list.push_back(trial_mode_iter); + + max_superbucket_tm_indices = maximum(max_superbucket_tm_indices, v.m_trial_mode_list.size_u32()); + } + else + { + //const astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t s = astc_helpers::cem_is_ldr_base_scale(tm.m_cem) ? 1 : 0; + const uint32_t num_est_parts_to_try = (tm.m_num_parts == 2) ? m_num_est_parts2[s] : m_num_est_parts3[s]; + + for (uint32_t est_part_iter = 0; est_part_iter < num_est_parts_to_try; est_part_iter++) + { + const uint32_t part_unique_index = (tm.m_num_parts == 2) ? m_best_parts2[s][est_part_iter] : m_best_parts3[s][est_part_iter]; + + new_key.m_subset_unique_index = safe_cast_uint16(part_unique_index); + + auto ins_res = m_superbucket_hash.insert(new_key, new_val); + const bool created_flag = ins_res.second; + + assert(ins_res.first->first.m_cem_index == tm.m_cem); + assert(ins_res.first->first.m_ccs_index == tm.m_ccs_index); + assert(ins_res.first->first.m_num_subsets == tm.m_num_parts); + + trial_mode_estimate_superbucket_value& v = (ins_res.first)->second; + if (created_flag) + v.m_trial_mode_list.reserve(256); + + v.m_trial_mode_list.push_back(trial_mode_iter); + + max_superbucket_tm_indices = maximum(max_superbucket_tm_indices, v.m_trial_mode_list.size_u32()); + + } // est_part_iter + } + + } // j + + //fmt_debug_printf("Total superbucket entries: {}\n", m_superbucket_hash.size()); + //fmt_debug_printf("Max superbucket tm indices: {}\n", max_superbucket_tm_indices); + + const uint32_t total_block_texels = p.m_total_block_pixels; + const float inv_total_block_texels = 1.0f / (float)total_block_texels; + + while (m_trial_mode_estimate_priority_queue.size()) + m_trial_mode_estimate_priority_queue.pop(); + + const uint32_t max_priority_queue_size = p.m_superbucket_max_to_retain[m_block_complexity_index]; + + // purposely downscale lost scale energy relative to the other error sources + // this biased the encoder towards smaller grids + const float SLAM_TO_LINE_WEIGHT = 1.5f; // upweight STL relative to other errors to give the estimator more of a signal especially for dual plane + const float QUANT_ERROR_WEIGHT = 1.0f; // quant error is naturally quite pessimistic + const float SCALE_ERROR_WEIGHT = 3.0f; // weight grid downsample (scale) error + + // Discount for blue contraction encoding and base+offset CEM's. + const float BLUE_CONTRACTION_ENDPOINT_QUANT_DISCOUNT = .5f; + + // Iterate over all superbuckets, surrogate encode to compute slam to line error, DCT of weight grid(s) to estimate energy lost during weight grid downsampling. + // TODO: priority queue and aggressive early outs + for (auto superbucket_iter = m_superbucket_hash.begin(); superbucket_iter != m_superbucket_hash.end(); ++superbucket_iter) + { + const trial_mode_estimate_superbucket_key& key = superbucket_iter->first; + const trial_mode_estimate_superbucket_value& val = superbucket_iter->second; + + //const bool cem_has_alpha = astc_helpers::does_cem_have_alpha(key.m_cem_index); + + log_surrogate_astc_blk log_blk; + + const astc_ldr::partitions_data* pPart_data = nullptr; + const astc_ldr::partition_pattern_vec* pPat = nullptr; + + //const uint32_t num_planes = (key.m_ccs_index >= 0) ? 2 : 1; + + const float worst_wsse_found_so_far = (m_trial_mode_estimate_priority_queue.size() >= max_priority_queue_size) ? m_trial_mode_estimate_priority_queue.top().m_wsse : 1e+9f; + + float slam_to_line_wsse = 0; + if (key.m_num_subsets == 1) + { + slam_to_line_wsse = encode_surrogate_trial( + p.m_block_width, p.m_block_height, + pixel_stats, + key.m_cem_index, + key.m_ccs_index, + astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_64_LEVELS, + p.m_block_width, p.m_block_height, + log_blk, + *p.m_pEnc_params, + astc_ldr::cFlagDisableQuant); + } + else + { + pPart_data = (key.m_num_subsets == 3) ? p.m_pPart_data_p3 : p.m_pPart_data_p2; + + const uint32_t unique_seed_index = key.m_subset_unique_index; + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[unique_seed_index]; + + pPat = &pPart_data->m_partition_pats[unique_seed_index]; + + slam_to_line_wsse = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + key.m_cem_index, key.m_num_subsets, part_seed_index, pPat, + astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_64_LEVELS, + p.m_block_width, p.m_block_height, + log_blk, + *p.m_pEnc_params, + astc_ldr::cFlagDisableQuant); + } + + stats.m_total_surrogate_encodes++; + + // Early out: Slam to line error is so high it's impossible for any blocks in this bucket to win. + if ((SLAM_TO_LINE_WEIGHT * slam_to_line_wsse) >= worst_wsse_found_so_far) + continue; + + bool can_use_base_ofs = false; + if ((key.m_cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (key.m_cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + float max_span_size = 0.0f; + + for (uint32_t subset_index = 0; subset_index < key.m_num_subsets; subset_index++) + { + const vec4F subset_chan_spans(log_blk.m_endpoints[subset_index][1] - log_blk.m_endpoints[subset_index][0]); + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(subset_chan_spans[c]); + max_span_size = maximum(max_span_size, span_size); + } + } + + can_use_base_ofs = (max_span_size < .25f); + } + + assert(p.m_pDCT2F); + + assert((p.m_pDCT2F->rows() == p.m_block_height) && (p.m_pDCT2F->cols() == p.m_block_width)); + + float weight0_energy[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + float weight1_energy[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + basist::astc_ldr_t::fvec& dct_work = m_dct_work; + + // Forward DCT in normalized weight (surrogate) space + p.m_pDCT2F->forward(log_blk.m_weights0, weight0_energy, dct_work); + compute_energy_from_dct(p.m_block_width, p.m_block_height, weight0_energy); + + if (key.m_ccs_index >= 0) + { + p.m_pDCT2F->forward(log_blk.m_weights1, weight1_energy, dct_work); + compute_energy_from_dct(p.m_block_width, p.m_block_height, weight1_energy); + } + + weight_terms weight0_terms, weight1_terms; + weight_terms* pWeight0_terms = &weight0_terms; + weight_terms* pWeight1_terms = nullptr; + weight0_terms.calc(total_block_texels, log_blk.m_weights0); + if (key.m_ccs_index >= 0) + { + weight1_terms.calc(total_block_texels, log_blk.m_weights1); + pWeight1_terms = &weight1_terms; + } + + // Precompute subset span and total pixels info + vec4F subset_spans[astc_helpers::MAX_PARTITIONS]; + uint32_t subset_pixels[astc_helpers::MAX_PARTITIONS]; + + for (uint32_t subset_index = 0; subset_index < key.m_num_subsets; subset_index++) + { + subset_spans[subset_index] = log_blk.m_endpoints[subset_index][1] - log_blk.m_endpoints[subset_index][0]; + + uint32_t total_subset_pixels = p.m_total_block_pixels; + if (key.m_num_subsets > 1) + total_subset_pixels = pPart_data->m_partition_pat_histograms[key.m_subset_unique_index].m_hist[subset_index]; + + subset_pixels[subset_index] = total_subset_pixels; + } + + // Loop through all trial modes in this sueprbucket. TODO: Sort by endpoint levels? + for (uint32_t k = 0; k < val.m_trial_mode_list.size(); k++) + { + const uint32_t trial_mode_index = val.m_trial_mode_list[k]; + assert(trial_mode_index < p.m_num_trial_modes); + + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_index]; + + assert(tm.m_cem == key.m_cem_index); + assert(tm.m_ccs_index == key.m_ccs_index); + assert(tm.m_num_parts == key.m_num_subsets); + + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(p.m_block_width, p.m_block_height, tm.m_grid_width, tm.m_grid_height); + + const uint32_t total_endpoint_levels = astc_helpers::get_ise_levels(tm.m_endpoint_ise_range); + const uint32_t total_weight_levels = astc_helpers::get_ise_levels(tm.m_weight_ise_range); + + const uint32_t num_effective_e_levels = can_use_base_ofs ? minimum(total_endpoint_levels * 2, 256) : total_endpoint_levels; + float qe0 = compute_quantized_channel_endpoint_mse_estimate(num_effective_e_levels); + const float qe1 = (key.m_ccs_index >= 0) ? (qe0 * pWeight1_terms->m_endpoint_factor) : 0.0f; + qe0 *= pWeight0_terms->m_endpoint_factor; + + float total_e_quant_wsse = 0.0f; + + for (uint32_t subset_index = 0; subset_index < key.m_num_subsets; subset_index++) + { + const vec4F& subset_chan_spans = subset_spans[subset_index]; + const uint32_t total_subset_pixels = subset_pixels[subset_index]; + + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(subset_chan_spans[c]); + + if ((span_size == 0.0f) && ((log_blk.m_endpoints[subset_index][1][c] == 0.0f) || (log_blk.m_endpoints[subset_index][1][c] == 1.0f))) + continue; + + // Scale channel MSE by chan weight and the # of subset pixels to get weighted SSE + const float chan_N = (float)p.m_pEnc_params->m_comp_weights[c] * (float)total_subset_pixels; + + total_e_quant_wsse += ((key.m_ccs_index == (int)c) ? qe1 : qe0) * chan_N; + + } // chan_index + } + + if ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + total_e_quant_wsse *= BLUE_CONTRACTION_ENDPOINT_QUANT_DISCOUNT; + + float total_wsse_so_far = (SLAM_TO_LINE_WEIGHT * slam_to_line_wsse) + (QUANT_ERROR_WEIGHT * total_e_quant_wsse); + if (total_wsse_so_far >= worst_wsse_found_so_far) + continue; + + float lost_weight_energy0 = compute_lost_dct_energy(p.m_block_width, p.m_block_height, weight0_energy, tm.m_grid_width, tm.m_grid_height) * inv_total_block_texels; + + float lost_weight_energy1 = 0; + if (key.m_ccs_index >= 0) + lost_weight_energy1 = compute_lost_dct_energy(p.m_block_width, p.m_block_height, weight1_energy, tm.m_grid_width, tm.m_grid_height) * inv_total_block_texels; + + // Add up: + // slam to line error WSSE (weighted sum of squared errors) + // weight quant error WSSE + // endpoint quant error WSSE + // weight grid rescale error WSSE (scaled by span^2) + float total_scale_wsse = 0.0f; + + for (uint32_t subset_index = 0; subset_index < key.m_num_subsets; subset_index++) + { + const vec4F& subset_chan_spans = subset_spans[subset_index]; + const uint32_t total_subset_pixels = subset_pixels[subset_index]; + + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(subset_chan_spans[c]); + + if ((span_size == 0.0f) && ((log_blk.m_endpoints[subset_index][1][c] == 0.0f) || (log_blk.m_endpoints[subset_index][1][c] == 1.0f))) + { + // Won't have any E/W quant err at extremes (0.0 or 1.0 are always perfectly represented), no weight downsample error either. + //chan_mse.m_ep = 0.0f; + //chan_mse.m_wp = 0.0f; + } + else + { + // Scale channel MSE by chan weight and the # of subset pixels to get weighted SSE + const float chan_N = (float)p.m_pEnc_params->m_comp_weights[c] * (float)total_subset_pixels; + + // sum in the plane's lost weight energy, scaled by span_size^2 * chan_weight * num_texels_covered + if (key.m_ccs_index == (int)c) + total_scale_wsse += lost_weight_energy1 * square(span_size) * chan_N; + else + total_scale_wsse += lost_weight_energy0 * square(span_size) * chan_N; + } + + } // chan_index + } + + total_wsse_so_far += (SCALE_ERROR_WEIGHT * total_scale_wsse); + if (total_wsse_so_far >= worst_wsse_found_so_far) + continue; + + float total_w_quant_wsse = 0.0f; + for (uint32_t subset_index = 0; subset_index < key.m_num_subsets; subset_index++) + { + const vec4F& subset_chan_spans = subset_spans[subset_index]; + const uint32_t total_subset_pixels = subset_pixels[subset_index]; + + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(subset_chan_spans[c]); + + if ((span_size == 0.0f) && ((log_blk.m_endpoints[subset_index][1][c] == 0.0f) || (log_blk.m_endpoints[subset_index][1][c] == 1.0f))) + { + // Won't have any E/W quant err at extremes (0.0 or 1.0 are always perfectly represented), no weight downsample error either. + //chan_mse.m_ep = 0.0f; + //chan_mse.m_wp = 0.0f; + } + else + { + // span_size != 0 here - estimate weight/endpoint quantization errors + float chan_w_mse = compute_quantized_channel_weight_mse_estimate( + total_weight_levels, span_size, + pGrid_data->m_weight_gamma, (key.m_ccs_index == (int)c) ? pWeight1_terms : pWeight0_terms); + + // Scale channel MSE by chan weight and the # of subset pixels to get weighted SSE + const float chan_N = (float)p.m_pEnc_params->m_comp_weights[c] * (float)total_subset_pixels; + + total_w_quant_wsse += chan_w_mse * chan_N; + } + + } // chan_index + + } // subset_index + + const float total_wsse = total_wsse_so_far + (QUANT_ERROR_WEIGHT * total_w_quant_wsse); + + if (m_trial_mode_estimate_priority_queue.size() >= max_priority_queue_size) + { + if (total_wsse < m_trial_mode_estimate_priority_queue.top().m_wsse) + { + m_trial_mode_estimate_priority_queue.pop(); + + trial_mode_estimate est; + est.m_superbucket_key = key; + est.m_trial_mode_index = trial_mode_index; + est.m_wsse = total_wsse; + + m_trial_mode_estimate_priority_queue.push(est); + } + } + else + { + trial_mode_estimate est; + est.m_superbucket_key = key; + est.m_trial_mode_index = trial_mode_index; + est.m_wsse = total_wsse; + + m_trial_mode_estimate_priority_queue.push(est); + } + + } // k + + } // superbucket_iter + + stats.m_total_superbuckets_created += m_superbucket_hash.size_u32(); + + const uint32_t total_estimates_to_retain = (uint32_t)m_trial_mode_estimate_priority_queue.size(); + assert(total_estimates_to_retain); + + for (uint32_t i = 0; i < total_estimates_to_retain; i++) + { + const trial_mode_estimate &est = m_trial_mode_estimate_priority_queue.top(); + + const trial_mode_estimate_superbucket_key& key = est.m_superbucket_key; + const uint32_t trial_mode_iter = est.m_trial_mode_index; + + assert(trial_mode_iter < p.m_num_trial_modes); + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_iter]; + + assert(tm.m_cem == key.m_cem_index); + assert(tm.m_ccs_index == key.m_ccs_index); + assert(tm.m_num_parts == key.m_num_subsets); + + const uint32_t part_unique_index = key.m_subset_unique_index; + + auto ins_res = shortlist_buckets.insert(shortlist_bucket(tm.m_grid_width, tm.m_grid_height, tm.m_cem, tm.m_ccs_index, tm.m_num_parts, part_unique_index)); + + ins_res.first->second.push_back(safe_cast_uint16(trial_mode_iter)); + + m_trial_mode_estimate_priority_queue.pop(); + } + } + else + { + for (uint32_t j = 0; j < m_trial_modes_to_estimate.size(); j++) + { + const uint32_t trial_mode_iter = m_trial_modes_to_estimate[j]; + + assert(trial_mode_iter < p.m_num_trial_modes); + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_iter]; + + if (tm.m_num_parts > 1) + { + //const astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t s = astc_helpers::cem_is_ldr_base_scale(tm.m_cem) ? 1 : 0; + const uint32_t num_est_parts_to_try = (tm.m_num_parts == 2) ? m_num_est_parts2[s] : m_num_est_parts3[s]; + + for (uint32_t est_part_iter = 0; est_part_iter < num_est_parts_to_try; est_part_iter++) + { + const uint32_t part_unique_index = (tm.m_num_parts == 2) ? m_best_parts2[s][est_part_iter] : m_best_parts3[s][est_part_iter]; + + auto ins_res = shortlist_buckets.insert(shortlist_bucket(tm.m_grid_width, tm.m_grid_height, tm.m_cem, tm.m_ccs_index, tm.m_num_parts, part_unique_index)); + + ins_res.first->second.push_back(safe_cast_uint16(trial_mode_iter)); + + } // est_part_iter + + } + else + { + auto ins_res = shortlist_buckets.insert(shortlist_bucket(tm.m_grid_width, tm.m_grid_height, tm.m_cem, tm.m_ccs_index, 1, 0)); + ins_res.first->second.push_back(safe_cast_uint16(trial_mode_iter)); + + } + } + } + + stats.m_total_buckets_created += (uint32_t)shortlist_buckets.size(); + +#if 0 + // TEMP + uint32_t max_bucket_tm_indices = 0; + for (auto it = shortlist_buckets.begin(); it != shortlist_buckets.end(); ++it) + { + shortlist_bucket& bucket = it->first; + trial_mode_index_vec& trial_mode_indices = it->second; + max_bucket_tm_indices = maximum(max_bucket_tm_indices, trial_mode_indices.size_u32()); + } + + fmt_debug_printf("max_bucket_tm_indices: {}\n", max_bucket_tm_indices); +#endif + + return true; + } + + bool surrogate_encode_shortlist_bucket_representatives( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + + shortlist_bucket_hash_t& shortlist_buckets = m_shortlist_hash0; + + // Surrogate encode a representative for each bucket. + for (auto it = shortlist_buckets.begin(); it != shortlist_buckets.end(); ++it) + { + shortlist_bucket& bucket = it->first; + //const uint_vec& trial_mode_indices = it->second; + const trial_mode_index_vec& trial_mode_indices = it->second; + + // Choose bucket's largest endpoint/weight ise ranges (finest quant levels) - anything in the bucket will quite likely encode to worse SSE, which we can rapidly estimate. + uint32_t max_endpoint_ise_range = 0, max_weight_ise_range = 0; + for (uint32_t i = 0; i < trial_mode_indices.size(); i++) + { + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_indices[i]]; + + max_endpoint_ise_range = maximum(max_endpoint_ise_range, tm.m_endpoint_ise_range); + max_weight_ise_range = maximum(max_weight_ise_range, tm.m_weight_ise_range); + } + + log_surrogate_astc_blk& log_block = bucket.m_surrogate_log_blk; + + if (bucket.m_num_parts == 1) + { + bucket.m_sse = encode_surrogate_trial( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, + bucket.m_ccs_index, + max_endpoint_ise_range, max_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + log_block, + *p.m_pEnc_params, 0); + + stats.m_total_surrogate_encodes++; + } + else + { + const astc_ldr::partitions_data* pPart_data = (bucket.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[bucket.m_unique_seed_index]; + + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[bucket.m_unique_seed_index]; + + bucket.m_sse = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, bucket.m_num_parts, part_seed_index, pPat, + max_endpoint_ise_range, max_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + log_block, + *p.m_pEnc_params, 0); + + stats.m_total_surrogate_encodes++; + } + + if ((bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (bucket.m_cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + // blue contraction/base+offset discount + bucket.m_sse *= BLUE_CONTRACTION_BASE_OFS_DISCOUNT; + } + + } // it + + return true; + } + + bool prune_shortlist_buckets( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(pixel_stats); + BASISU_NOTE_UNUSED(stats); + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + + shortlist_bucket_hash_t& shortlist_buckets = m_shortlist_hash0; + + if (p.m_bucket_pruning_passes) + { + shortlist_bucket_hash_t& new_shortlist_buckets = m_shortlist_hash1; + + if (m_shortlist_hash1.get_table_size() != EXPECTED_SHORTLIST_HASH_SIZE) + { + const bool was_allocated = m_shortlist_hash1.get_table_size() > 0; + + m_shortlist_hash1.clear(); + m_shortlist_hash1.reserve(EXPECTED_SHORTLIST_HASH_SIZE / 2); + + if ((g_devel_messages) && (was_allocated)) + fmt_debug_printf("shortlist hash1 thrash\n"); + } + else + { + m_shortlist_hash1.reset(); + } + + const uint32_t NUM_PRUNE_PASSES = 3; + for (uint32_t prune_pass = 0; prune_pass < NUM_PRUNE_PASSES; prune_pass++) + { + for (auto it = shortlist_buckets.begin(); it != shortlist_buckets.end(); ++it) + it->first.m_examined_flag = false; + + new_shortlist_buckets.reset(); + + for (auto it = shortlist_buckets.begin(); it != shortlist_buckets.end(); ++it) + { + shortlist_bucket& bucket = it->first; + + if (bucket.m_examined_flag) + continue; + + if (prune_pass == 0) + { + // Prune pass 0: Dual plane groups: only accept best CCS index + if (bucket.m_ccs_index >= 0) + { + shortlist_bucket_hash_t::iterator ccs_buckets[4]; + + int best_ccs_index = -1; + float best_ccs_err = BIG_FLOAT_VAL; + + bool skip_bucket = false; + for (uint32_t c = 0; c < 4; c++) + { + auto ccs_res_it = shortlist_buckets.find(shortlist_bucket(bucket.m_grid_width, bucket.m_grid_height, bucket.m_cem_index, c, bucket.m_num_parts, bucket.m_unique_seed_index)); + ccs_buckets[c] = ccs_res_it; + + if (ccs_res_it == shortlist_buckets.end()) + continue; + + assert(!ccs_res_it->first.m_examined_flag); + + ccs_res_it->first.m_examined_flag = true; + + float ccs_sse_err = ccs_res_it->first.m_sse; + if (ccs_sse_err < best_ccs_err) + { + best_ccs_err = ccs_sse_err; + best_ccs_index = c; + } + } // c + + if (!skip_bucket) + { + assert(best_ccs_index >= 0); + + shortlist_bucket_hash_t::iterator best_ccs_it = ccs_buckets[best_ccs_index]; + assert(best_ccs_it != shortlist_buckets.end()); + + new_shortlist_buckets.insert(best_ccs_it->first, best_ccs_it->second); + } + } + else + { + new_shortlist_buckets.insert(it->first, it->second); + } + } + else if (prune_pass == 1) + { + // Prune pass 1: Same # of weight samples, compare WxH vs. HxW + if (bucket.m_grid_width != bucket.m_grid_height) + { + auto alt_res_it = shortlist_buckets.find(shortlist_bucket(bucket.m_grid_height, bucket.m_grid_width, bucket.m_cem_index, bucket.m_ccs_index, bucket.m_num_parts, bucket.m_unique_seed_index)); + if (alt_res_it == shortlist_buckets.end()) + { + new_shortlist_buckets.insert(it->first, it->second); + } + else + { + assert(!alt_res_it->first.m_examined_flag); + alt_res_it->first.m_examined_flag = true; + + const float fract = (bucket.m_sse > 0.0f) ? (alt_res_it->first.m_sse / bucket.m_sse) : 0.0f; + + const float ALT_RES_SSE_THRESH = .2f; + if (fract < (1.0f - ALT_RES_SSE_THRESH)) + new_shortlist_buckets.insert(alt_res_it->first, alt_res_it->second); + else if (fract > (1.0f + ALT_RES_SSE_THRESH)) + new_shortlist_buckets.insert(it->first, it->second); + else + { + new_shortlist_buckets.insert(alt_res_it->first, alt_res_it->second); + new_shortlist_buckets.insert(it->first, it->second); + } + } + } + else + { + new_shortlist_buckets.insert(it->first, it->second); + } + + } + else if (prune_pass == 2) + { + // Prune pass 2: RGB Direct vs. Scale bucket groups + + if ((bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || + (bucket.m_cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT) || (bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)) + { + uint32_t alt_cem_index_to_find = astc_helpers::CEM_LDR_RGB_BASE_SCALE; + + // Check for pairs: CEM_LDR_RGB_DIRECT vs. CEM_LDR_RGB_BASE_SCALE, or CEM_LDR_RGBA_DIRECT vs. CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A. + switch (bucket.m_cem_index) + { + case astc_helpers::CEM_LDR_RGB_DIRECT: + alt_cem_index_to_find = astc_helpers::CEM_LDR_RGB_BASE_SCALE; + break; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: + alt_cem_index_to_find = astc_helpers::CEM_LDR_RGB_DIRECT; + break; + case astc_helpers::CEM_LDR_RGBA_DIRECT: + alt_cem_index_to_find = astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A; + break; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + alt_cem_index_to_find = astc_helpers::CEM_LDR_RGBA_DIRECT; + break; + default: + assert(0); + break; + } + + auto alt_res_it = shortlist_buckets.find(shortlist_bucket(bucket.m_grid_width, bucket.m_grid_height, alt_cem_index_to_find, bucket.m_ccs_index, bucket.m_num_parts, bucket.m_unique_seed_index)); + + if (alt_res_it == shortlist_buckets.end()) + { + new_shortlist_buckets.insert(it->first, it->second); + } + else + { + assert(!alt_res_it->first.m_examined_flag); + + alt_res_it->first.m_examined_flag = true; + + // Compare the two buckets, decide if one or another can be tossed as not worth it. + const float fract = (bucket.m_sse > 0.0f) ? (alt_res_it->first.m_sse / bucket.m_sse) : 0.0f; + + const float ALT_RES_SSE_THRESH = .1f; + if (fract < (1.0f - ALT_RES_SSE_THRESH)) + new_shortlist_buckets.insert(alt_res_it->first, alt_res_it->second); + else if (fract > (1.0f + ALT_RES_SSE_THRESH)) + new_shortlist_buckets.insert(it->first, it->second); + else + { + new_shortlist_buckets.insert(alt_res_it->first, alt_res_it->second); + new_shortlist_buckets.insert(it->first, it->second); + } + } + } + else + { + new_shortlist_buckets.insert(it->first, it->second); + } + + } // if (prune_pass + + it->first.m_examined_flag = true; + } + + new_shortlist_buckets.swap(shortlist_buckets); + } // prune_pass + } // if (g_bucket_pruning_passes) + + assert(shortlist_buckets.size()); + + if (m_ranked_buckets.capacity() < shortlist_buckets.size()) + m_ranked_buckets.reserve(shortlist_buckets.size()); + + for (auto it = shortlist_buckets.begin(); it != shortlist_buckets.end(); ++it) + { + shortlist_bucket& bucket = it->first; + const trial_mode_index_vec& trial_mode_indices = it->second; + + ranked_shortlist_bucket* pDst = m_ranked_buckets.enlarge(1); + pDst->m_bucket = bucket; + pDst->m_trial_mode_indices = trial_mode_indices; + } + + assert(m_ranked_buckets.size()); + + // Sort the buckets by their surrogate encoded SSE to rank them. + std::sort(m_ranked_buckets.begin(), m_ranked_buckets.end()); + + return true; + } + + bool rank_and_sort_shortlist_buckets( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + BASISU_NOTE_UNUSED(blur_id); + BASISU_NOTE_UNUSED(out_blocks); + + basisu::vector& shortlist_trials = m_trial_surrogates; + + // TODO: Tune this further. Memory here adds up across all encoding threads. + { + //const float reserve_factor = (sizeof(void*) > 4) ? .5f : .25f; + const uint32_t reserve_size = 64;// maximum(256, (int)(p.m_num_trial_modes * reserve_factor)); + + if (shortlist_trials.capacity() < reserve_size) + shortlist_trials.reserve(reserve_size); + + shortlist_trials.resize(0); + } + + uint32_t num_buckets_to_examine = fast_roundf_int((float)m_ranked_buckets.size_u32() * p.m_shortlist_buckets_to_examine_fract); + num_buckets_to_examine = clamp(num_buckets_to_examine, p.m_shortlist_buckets_to_examine_min, p.m_shortlist_buckets_to_examine_max); + + num_buckets_to_examine = clamp(num_buckets_to_examine, 1, m_ranked_buckets.size_u32()); + + float best_err_so_far = BIG_FLOAT_VAL; + + for (uint32_t bucket_index = 0; bucket_index < num_buckets_to_examine; bucket_index++) + { + const shortlist_bucket& bucket = m_ranked_buckets[bucket_index].m_bucket; + const trial_mode_index_vec& bucket_trial_mode_indices = m_ranked_buckets[bucket_index].m_trial_mode_indices; + + if (best_err_so_far != BIG_FLOAT_VAL) + { + if (bucket.m_sse > best_err_so_far * SKIP_IF_BUCKET_WORSE_MULTIPLIER) + continue; + } + best_err_so_far = minimum(best_err_so_far, bucket.m_sse); + + if (bucket_trial_mode_indices.size() == 1) + { + // Bucket only contains 1 mode, so we've already encoded its surrogate. + trial_surrogate& s = *shortlist_trials.try_enlarge(1); + + s.m_trial_mode_index = bucket_trial_mode_indices[0]; + s.m_err = bucket.m_sse; + s.m_log_blk = bucket.m_surrogate_log_blk; + continue; + } + + //----- + // We have a bucket sharing all config except for ISE weight/endpoint levels. Decide how many to place on the shortlist using analytic weighted MSE/SSE estimates. + + const uint32_t num_modes_in_bucket = bucket_trial_mode_indices.size_u32(); + + uint32_t num_modes_in_bucket_to_shortlist = fast_roundf_pos_int(num_modes_in_bucket * p.m_num_similar_modes_in_bucket_to_shortlist_fract); + + num_modes_in_bucket_to_shortlist = clamp(num_modes_in_bucket_to_shortlist, p.m_num_similar_modes_in_bucket_to_shortlist_fract_min, p.m_num_similar_modes_in_bucket_to_shortlist_fract_max); + + num_modes_in_bucket_to_shortlist = clamp(num_modes_in_bucket_to_shortlist, 1, num_modes_in_bucket); + + basisu::vector bucket_indices(num_modes_in_bucket); + for (uint32_t i = 0; i < num_modes_in_bucket; i++) + bucket_indices[i] = i; + + if (num_modes_in_bucket_to_shortlist < num_modes_in_bucket) + { + basisu::vector sse_estimates(num_modes_in_bucket); + + const uint32_t bucket_surrogate_endpoint_levels = bucket.m_surrogate_log_blk.m_num_endpoint_levels; + const uint32_t bucket_surrogate_weight_levels = bucket.m_surrogate_log_blk.m_num_weight_levels; + const float bucket_surrogate_base_sse = bucket.m_sse; + + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(p.m_block_width, p.m_block_height, bucket.m_grid_width, bucket.m_grid_height); + const astc_ldr::partitions_data* pBucket_part_data = (bucket.m_num_parts == 1) ? nullptr : ((bucket.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3); + + bool can_use_base_ofs = false; + if ((bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (bucket.m_cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + float max_span_size = 0.0f; + for (uint32_t part_iter = 0; part_iter < bucket.m_num_parts; part_iter++) + { + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] - bucket.m_surrogate_log_blk.m_endpoints[part_iter][0][c]); + max_span_size = maximum(max_span_size, span_size); + } + } + + can_use_base_ofs = max_span_size < .25f; + } + + chan_mse_est bucket_sse_est(0.0f, 0.0f); + for (uint32_t part_iter = 0; part_iter < bucket.m_num_parts; part_iter++) + { + uint32_t total_texels_in_part = p.m_block_width * p.m_block_height; + if (bucket.m_num_parts > 1) + { + total_texels_in_part = pBucket_part_data->m_partition_pat_histograms[bucket.m_unique_seed_index].m_hist[part_iter]; + assert(total_texels_in_part && total_texels_in_part < p.m_block_width * p.m_block_height); + } + + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] - bucket.m_surrogate_log_blk.m_endpoints[part_iter][0][c]); + + chan_mse_est chan_mse_est(compute_quantized_channel_mse_estimates( + can_use_base_ofs ? minimum(bucket_surrogate_endpoint_levels * 2, 256) : bucket_surrogate_endpoint_levels, + bucket_surrogate_weight_levels, + span_size, pGrid_data->m_weight_gamma)); + + if (span_size == 0.0f) + { + if ((bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] == 1.0f) || (bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] == 0.0f)) + { + chan_mse_est.m_ep = 0.0f; + chan_mse_est.m_wp = 0.0f; + } + } + + bucket_sse_est.m_ep += chan_mse_est.m_ep * (float)p.m_pEnc_params->m_comp_weights[c] * total_texels_in_part; + bucket_sse_est.m_wp += chan_mse_est.m_wp * (float)p.m_pEnc_params->m_comp_weights[c] * total_texels_in_part; + } // c + + } // part_iter + +#if 0 + fmt_debug_printf("----------------\n"); + + fmt_debug_printf("bucket endpoint levels: {}, weight levels: {}, surrogate sse: {}, ep_est: {}, wp_est: {}, avg RGB subset0 span: {}\n", + bucket_surrogate_endpoint_levels, bucket_surrogate_weight_levels, + bucket.m_sse, + bucket_sse_est.m_ep, bucket_sse_est.m_wp, + (fabs(bucket.m_surrogate_log_blk.m_endpoints[0][1][0] - bucket.m_surrogate_log_blk.m_endpoints[0][0][0]) + + fabs(bucket.m_surrogate_log_blk.m_endpoints[0][1][1] - bucket.m_surrogate_log_blk.m_endpoints[0][0][1]) + + fabs(bucket.m_surrogate_log_blk.m_endpoints[0][1][2] - bucket.m_surrogate_log_blk.m_endpoints[0][0][2])) / 3.0f); +#endif + + for (uint32_t j = 0; j < bucket_trial_mode_indices.size(); j++) + { + const uint32_t trial_mode_index = bucket_trial_mode_indices[j]; + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_index]; + + const uint32_t trial_mode_endpoint_levels = astc_helpers::get_ise_levels(tm.m_endpoint_ise_range); + const uint32_t trial_mode_weight_levels = astc_helpers::get_ise_levels(tm.m_weight_ise_range); + + assert(trial_mode_endpoint_levels <= bucket_surrogate_endpoint_levels); + assert(trial_mode_weight_levels <= bucket_surrogate_weight_levels); + + chan_mse_est mode_sse_est(0.0f, 0.0f); + for (uint32_t part_iter = 0; part_iter < bucket.m_num_parts; part_iter++) + { + uint32_t total_texels_in_part = p.m_block_width * p.m_block_height; + if (bucket.m_num_parts > 1) + { + total_texels_in_part = pBucket_part_data->m_partition_pat_histograms[bucket.m_unique_seed_index].m_hist[part_iter]; + assert(total_texels_in_part && total_texels_in_part < p.m_block_width * p.m_block_height); + } + + for (uint32_t c = 0; c < 4; c++) + { + float span_size = fabs(bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] - bucket.m_surrogate_log_blk.m_endpoints[part_iter][0][c]); + + chan_mse_est chan_mse_est(compute_quantized_channel_mse_estimates( + can_use_base_ofs ? minimum(trial_mode_endpoint_levels * 2, 256) : trial_mode_endpoint_levels, + trial_mode_weight_levels, + span_size, pGrid_data->m_weight_gamma)); + + if (span_size == 0.0f) + { + if ((bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] == 1.0f) || (bucket.m_surrogate_log_blk.m_endpoints[part_iter][1][c] == 0.0f)) + { + chan_mse_est.m_ep = 0.0f; + chan_mse_est.m_wp = 0.0f; + } + } + + mode_sse_est.m_ep += chan_mse_est.m_ep * (float)p.m_pEnc_params->m_comp_weights[c] * total_texels_in_part; + mode_sse_est.m_wp += chan_mse_est.m_wp * (float)p.m_pEnc_params->m_comp_weights[c] * total_texels_in_part; + } // c + + } // part_iter + + // Remove the bucket's base estimated endpoint/weight quant + if (trial_mode_endpoint_levels == bucket_surrogate_endpoint_levels) + { + mode_sse_est.m_ep = 0.0f; + } + else + { + mode_sse_est.m_ep -= bucket_sse_est.m_ep; + + if (mode_sse_est.m_ep < 0.0f) + mode_sse_est.m_ep = 0.0f; + } + + if (trial_mode_weight_levels == bucket_surrogate_weight_levels) + { + mode_sse_est.m_wp = 0.0f; + } + else + { + mode_sse_est.m_wp -= bucket_sse_est.m_wp; + + if (mode_sse_est.m_wp < 0.0f) + mode_sse_est.m_wp = 0.0f; + } + + float mode_total_sse_est = bucket_surrogate_base_sse + mode_sse_est.m_ep + mode_sse_est.m_wp; + + sse_estimates[j] = mode_total_sse_est; + +#if 0 + // TEMP comparison code + float actual_sse = 0.0f; + + { + log_surrogate_astc_blk temp_surrogate_log_blk; + if (bucket.m_num_parts == 1) + { + actual_sse = encode_surrogate_trial( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, + bucket.m_ccs_index, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + temp_surrogate_log_blk, + *p.m_pEnc_params); + } + else + { + const astc_ldr::partitions_data* pPart_data = (bucket.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[bucket.m_unique_seed_index]; + + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[bucket.m_unique_seed_index]; + + actual_sse = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, bucket.m_num_parts, part_seed_index, pPat, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + temp_surrogate_log_blk, + *p.m_pEnc_params, 0); + } + + stats.m_total_surrogate_encodes++; + } + + fmt_debug_printf("sse: {}, actual sse: {}, endpoint levels: {} weight levels: {}\n", sse_estimates[j], actual_sse, trial_mode_endpoint_levels, trial_mode_weight_levels); +#endif + + } // j + +#if 0 + fmt_debug_printf("\n"); +#endif + + indirect_sort(num_modes_in_bucket, bucket_indices.get_ptr(), sse_estimates.get_ptr()); + + } // if (num_modes_in_bucket_to_shortlist < num_modes_in_bucket) + + // Surrogate encode the best looking buckets after factoring in estimate SSE errors. + + for (uint32_t q = 0; q < num_modes_in_bucket_to_shortlist; q++) + { + const uint32_t j = bucket_indices[q]; + + trial_surrogate& s = *shortlist_trials.try_enlarge(1); + + const uint32_t trial_mode_index = bucket_trial_mode_indices[j]; + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_index]; + + s.m_trial_mode_index = trial_mode_index; + + if (bucket.m_num_parts == 1) + { + s.m_err = encode_surrogate_trial( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, + bucket.m_ccs_index, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + s.m_log_blk, + *p.m_pEnc_params, 0); + + stats.m_total_surrogate_encodes++; + } + else + { + const astc_ldr::partitions_data* pPart_data = (bucket.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t part_seed_index = pPart_data->m_unique_index_to_part_seed[bucket.m_unique_seed_index]; + + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[bucket.m_unique_seed_index]; + + s.m_err = encode_surrogate_trial_subsets( + p.m_block_width, p.m_block_height, + pixel_stats, + bucket.m_cem_index, bucket.m_num_parts, part_seed_index, pPat, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + bucket.m_grid_width, bucket.m_grid_height, + s.m_log_blk, + *p.m_pEnc_params, 0); + + stats.m_total_surrogate_encodes++; + } + + if ((bucket.m_cem_index == astc_helpers::CEM_LDR_RGB_DIRECT) || (bucket.m_cem_index == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + // blue contraction/base+offset discount + s.m_err *= BLUE_CONTRACTION_BASE_OFS_DISCOUNT; + } + + } // j + + } // bucket_index + + if (!shortlist_trials.size()) + return false; + + shortlist_trials.sort(); + + stats.m_total_shortlist_candidates += shortlist_trials.size_u32(); + + return true; + } + + bool final_polish_encode_from_shortlist( + const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + basisu::vector& shortlist_trials = m_trial_surrogates; + + // TODO: Diversity selection + const float shortlist_fract = p.m_final_shortlist_fraction[m_block_complexity_index]; + + uint32_t max_shortlist_trials = (uint32_t)std::roundf((float)shortlist_trials.size_u32() * shortlist_fract); + + max_shortlist_trials = clamp(max_shortlist_trials, p.m_final_shortlist_min_size[m_block_complexity_index], p.m_final_shortlist_max_size[m_block_complexity_index]); + + uint32_t total_shortlist_trials = clamp(max_shortlist_trials, 1, shortlist_trials.size_u32()); + + const uint32_t EARLY_STOP2_SHORTLIST_ITER_INDEX = 5; + + // Now do the real encodes on the top surrogate shortlist trials. + for (uint32_t shortlist_iter = 0; shortlist_iter < total_shortlist_trials; shortlist_iter++) + { + const uint32_t trial_mode_index = shortlist_trials[shortlist_iter].m_trial_mode_index; + const basist::astc_ldr_t::trial_mode& tm = p.m_pTrial_modes[trial_mode_index]; + + astc_helpers::log_astc_block log_astc_blk; + + bool base_ofs_succeeded_flag = false; + + if ((p.m_final_encode_try_base_ofs) && ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT))) + { + // Add RGB/RGBA BASE PLUS OFFSET variant. + astc_helpers::log_astc_block log_astc_blk_alt; + + const uint32_t base_ofs_cem_index = (tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) ? astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET : astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET; + + bool base_ofs_clamped_flag = false; + + bool alt_enc_trial_status; + if (tm.m_num_parts > 1) + { + const astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t part_seed_index = shortlist_trials[shortlist_iter].m_log_blk.m_seed_index; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[part_unique_index]; + + alt_enc_trial_status = encode_trial_subsets( + p.m_block_width, p.m_block_height, pixel_stats, base_ofs_cem_index, tm.m_num_parts, + part_seed_index, pPat, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + tm.m_grid_width, tm.m_grid_height, log_astc_blk_alt, *p.m_pEnc_params, false, + p.m_gradient_descent_flag, p.m_polish_weights_flag, p.m_qcd_enabled_flag, + p.m_use_blue_contraction, &base_ofs_clamped_flag); + } + else + { + alt_enc_trial_status = encode_trial( + p.m_block_width, p.m_block_height, pixel_stats, base_ofs_cem_index, + tm.m_ccs_index != -1, tm.m_ccs_index, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + tm.m_grid_width, tm.m_grid_height, log_astc_blk_alt, *p.m_pEnc_params, + p.m_gradient_descent_flag, p.m_polish_weights_flag, p.m_qcd_enabled_flag, + p.m_use_blue_contraction, &base_ofs_clamped_flag); + } + + assert(alt_enc_trial_status); + + if (alt_enc_trial_status) + { + stats.m_total_full_encodes++; + + encode_block_output* pOut_block2 = out_blocks.enlarge(1); + pOut_block2->clear(); + pOut_block2->m_trial_mode_index = safe_cast_int16(trial_mode_index); + pOut_block2->m_log_blk = log_astc_blk_alt; + pOut_block2->m_blur_id = safe_cast_uint16(blur_id); + pOut_block2->m_sse = eval_error(p.m_block_width, p.m_block_height, log_astc_blk_alt, pixel_stats, *p.m_pEnc_params); + + if ((p.m_early_stop_wpsnr) || (p.m_early_stop2_wpsnr)) + { + const float wpsnr = compute_psnr_from_wsse(p.m_block_width, p.m_block_height, pOut_block2->m_sse, p.m_pEnc_params->get_total_comp_weights()); + + if ((p.m_early_stop_wpsnr) && (wpsnr >= p.m_early_stop_wpsnr)) + break; + + if (shortlist_iter >= EARLY_STOP2_SHORTLIST_ITER_INDEX) + { + if ((p.m_early_stop2_wpsnr) && (wpsnr >= p.m_early_stop2_wpsnr)) + break; + } + } + + base_ofs_succeeded_flag = !base_ofs_clamped_flag; + } + + } // (p.m_final_encode_try_base_ofs) + + if ((p.m_final_encode_always_try_rgb_direct) || (!base_ofs_succeeded_flag)) + { + bool enc_trial_status; + + if (tm.m_num_parts > 1) + { + const astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? p.m_pPart_data_p2 : p.m_pPart_data_p3; + + const uint32_t part_seed_index = shortlist_trials[shortlist_iter].m_log_blk.m_seed_index; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + assert(part_unique_index < astc_helpers::NUM_PARTITION_PATTERNS); + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[part_unique_index]; + + enc_trial_status = encode_trial_subsets( + p.m_block_width, p.m_block_height, pixel_stats, tm.m_cem, tm.m_num_parts, + part_seed_index, pPat, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + tm.m_grid_width, tm.m_grid_height, log_astc_blk, *p.m_pEnc_params, false, + p.m_gradient_descent_flag, p.m_polish_weights_flag, p.m_qcd_enabled_flag, + p.m_use_blue_contraction); + } + else + { + enc_trial_status = encode_trial( + p.m_block_width, p.m_block_height, pixel_stats, tm.m_cem, + tm.m_ccs_index != -1, tm.m_ccs_index, + tm.m_endpoint_ise_range, tm.m_weight_ise_range, + tm.m_grid_width, tm.m_grid_height, log_astc_blk, *p.m_pEnc_params, + p.m_gradient_descent_flag, p.m_polish_weights_flag, p.m_qcd_enabled_flag, + p.m_use_blue_contraction); + } + + assert(enc_trial_status); + + if (!enc_trial_status) + return false; + + stats.m_total_full_encodes++; + + { + encode_block_output* pOut_block1 = out_blocks.enlarge(1); + pOut_block1->clear(); + pOut_block1->m_trial_mode_index = safe_cast_int16(trial_mode_index); + pOut_block1->m_log_blk = log_astc_blk; + pOut_block1->m_blur_id = safe_cast_uint16(blur_id); + pOut_block1->m_sse = eval_error(p.m_block_width, p.m_block_height, log_astc_blk, pixel_stats, *p.m_pEnc_params); + + if ((p.m_early_stop_wpsnr) || (p.m_early_stop2_wpsnr)) + { + const float wpsnr = compute_psnr_from_wsse(p.m_block_width, p.m_block_height, pOut_block1->m_sse, p.m_pEnc_params->get_total_comp_weights()); + + if ((p.m_early_stop_wpsnr) && (wpsnr >= p.m_early_stop_wpsnr)) + break; + + if (shortlist_iter >= EARLY_STOP2_SHORTLIST_ITER_INDEX) + { + if ((p.m_early_stop2_wpsnr) && (wpsnr >= p.m_early_stop2_wpsnr)) + break; + } + } + } + + } // if (!skip_encode_flag) + + } // shortlist_iter + + return true; + } + + bool full_encode(const ldr_astc_lowlevel_block_encoder_params& p, + const astc_ldr::pixel_stats_t& pixel_stats, + basisu::vector& out_blocks, + uint32_t blur_id, + encode_block_stats& stats) + { + clear(); + + if (!init(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!partition_triage(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!trivial_triage(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!analytic_triage(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!surrogate_encode_shortlist_bucket_representatives(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!prune_shortlist_buckets(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!rank_and_sort_shortlist_buckets(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + if (!final_polish_encode_from_shortlist(p, pixel_stats, out_blocks, blur_id, stats)) + return false; + + return true; + } +}; + +class ldr_astc_lowlevel_block_encoder_pool +{ +public: + ldr_astc_lowlevel_block_encoder_pool() + { + } + + void init(uint32_t total_threads) + { + std::lock_guard g(m_mutex); + + m_pool.resize(total_threads); + + for (uint32_t i = 0; i < total_threads; i++) + m_pool[i].m_used_flag = false; + } + + void deinit() + { + std::lock_guard g(m_mutex); + + for (uint32_t i = 0; i < m_pool.size(); i++) + { + if (m_pool[i].m_used_flag) + { + assert(0); + debug_printf("ldr_astc_lowlevel_block_encoder_pool::deinit: Pool entry still marked as used\n"); + } + + m_pool[i].m_used_flag = false; + } + + m_pool.resize(0); + } + + ldr_astc_lowlevel_block_encoder* acquire() + { + std::lock_guard g(m_mutex); + + assert(m_pool.size()); + + ldr_astc_lowlevel_block_encoder* pRes = nullptr; + + for (uint32_t i = 0; i < m_pool.size(); i++) + { + if (!m_pool[i].m_used_flag) + { + pRes = &m_pool[i]; + pRes->m_used_flag = true; + + break; + } + } + + assert(pRes); + + return pRes; + } + + bool release(ldr_astc_lowlevel_block_encoder* pTemps) + { + std::lock_guard g(m_mutex); + + assert(m_pool.size()); + + if ((pTemps < m_pool.begin()) || (pTemps >= m_pool.end())) + { + assert(0); + return false; + } + + size_t idx = pTemps - m_pool.begin(); + if (idx >= m_pool.size()) + { + assert(0); + return false; + } + + m_pool[idx].m_used_flag = false; + + return true; + } + +private: + std::mutex m_mutex; + basisu::vector m_pool; +}; + +class scoped_ldr_astc_lowlevel_block_encoder +{ +public: + scoped_ldr_astc_lowlevel_block_encoder(ldr_astc_lowlevel_block_encoder_pool& pool) : + m_pool(pool) + { + m_pTemps = pool.acquire(); + } + + ~scoped_ldr_astc_lowlevel_block_encoder() + { + m_pool.release(m_pTemps); + } + + ldr_astc_lowlevel_block_encoder_pool& get_pool() const + { + return m_pool; + } + + ldr_astc_lowlevel_block_encoder* get_ptr() + { + return m_pTemps; + } + +private: + ldr_astc_lowlevel_block_encoder_pool& m_pool; + ldr_astc_lowlevel_block_encoder* m_pTemps; +}; + + +//------------------------------------------------------------------- + +#pragma pack(push, 1) +struct trial_mode_desc +{ + uint8_t m_unique_cem_index; // LDR base CEM's, 0-5 + uint8_t m_ccs; // 0 if SP, 1-4 for DP + uint8_t m_subsets; // 1-3 + uint8_t m_eise; // endpoint ise range, 4-20 + uint8_t m_wise; // weight ise range, 0-11 + uint8_t m_grid_w, m_grid_h; // grid resolution, 4-12 +}; +#pragma pack(pop) + +static const int s_astc_cem_to_unique_ldr_index[16] = +{ + 0, // CEM_LDR_LUM_DIRECT + -1, // CEM_LDR_LUM_BASE_PLUS_OFS + -1, // CEM_HDR_LUM_LARGE_RANGE + -1, // CEM_HDR_LUM_SMALL_RANGE + 1, // CEM_LDR_LUM_ALPHA_DIRECT + -1, // CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS + 2, // CEM_LDR_RGB_BASE_SCALE + -1, // CEM_HDR_RGB_BASE_SCALE + 3, // CEM_LDR_RGB_DIRECT + -1, // CEM_LDR_RGB_BASE_PLUS_OFFSET + 4, // CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A + -1, // CEM_HDR_RGB + 5, // CEM_LDR_RGBA_DIRECT + -1, // CEM_LDR_RGBA_BASE_PLUS_OFFSET + -1, // CEM_HDR_RGB_LDR_ALPHA + -1, // CEM_HDR_RGB_HDR_ALPHA +}; + +#if 0 +static const int s_unique_ldr_index_to_astc_cem[6] = +{ + astc_helpers::CEM_LDR_LUM_DIRECT, + astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT, + astc_helpers::CEM_LDR_RGB_BASE_SCALE, + astc_helpers::CEM_LDR_RGB_DIRECT, + astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A, + astc_helpers::CEM_LDR_RGBA_DIRECT +}; +#endif + +static uint32_t pack_tm_desc( + uint32_t grid_width, uint32_t grid_height, + uint32_t cem_index, uint32_t ccs_index, uint32_t num_subsets, + uint32_t endpoint_ise_range, uint32_t weight_ise_range) +{ + assert((grid_width >= 2) && (grid_width <= 12)); + assert((grid_height >= 2) && (grid_height <= 12)); + assert((cem_index < 16) && astc_helpers::is_cem_ldr(cem_index)); + assert((num_subsets >= 1) && (num_subsets <= 3)); + assert(ccs_index <= 4); // 0 for SP, 1-4 for DP + assert((endpoint_ise_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE)); + assert((weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE) && (weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE)); + + grid_width -= 2; + grid_height -= 2; + assert((grid_width <= 10) && (grid_height <= 10)); + + const int unique_cem_index = s_astc_cem_to_unique_ldr_index[cem_index]; + assert((unique_cem_index >= 0) && (unique_cem_index <= 5)); + assert(basist::astc_ldr_t::s_unique_ldr_index_to_astc_cem[unique_cem_index] == (int)cem_index); + + num_subsets--; + + endpoint_ise_range -= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE; + + uint32_t cur_bit_ofs = 0; + +#define BU_PACK_FIELD(val, bits) do { uint32_t v = (uint32_t)(val); assert(v < (1u << bits)); packed_id |= (v << cur_bit_ofs); cur_bit_ofs += (bits); } while(0) + + uint32_t packed_id = 0; + BU_PACK_FIELD(endpoint_ise_range, basist::astc_ldr_t::CFG_PACK_EISE_BITS); + BU_PACK_FIELD(weight_ise_range, basist::astc_ldr_t::CFG_PACK_WISE_BITS); + BU_PACK_FIELD(ccs_index, basist::astc_ldr_t::CFG_PACK_CCS_BITS); + BU_PACK_FIELD(num_subsets, basist::astc_ldr_t::CFG_PACK_SUBSETS_BITS); + BU_PACK_FIELD(unique_cem_index, basist::astc_ldr_t::CFG_PACK_CEM_BITS); + // must be at the top + BU_PACK_FIELD(grid_width * 11 + grid_height, basist::astc_ldr_t::CFG_PACK_GRID_BITS); +#undef BU_PACK_FIELD + + assert(cur_bit_ofs == 24); + + return packed_id; +} + +void create_encoder_trial_modes_full_eval(uint32_t block_width, uint32_t block_height, + basisu::vector& encoder_trial_modes, basist::astc_ldr_t::grouped_trial_modes& grouped_encoder_trial_modes, + bool print_debug_info = true, bool print_modes = false) +{ + interval_timer itm; + itm.start(); + + encoder_trial_modes.resize(0); + grouped_encoder_trial_modes.clear(); + + uint32_t max_grid_width = 0, max_grid_height = 0; + uint32_t total_evals = 0, total_partial_evals = 0, total_evals_succeeded = 0; + uint32_t mode_index = 0; + uint_vec packed_mode_ids; + + for (uint32_t alpha_iter = 0; alpha_iter < 2; alpha_iter++) + { + if (print_modes) + { + if (alpha_iter) + fmt_debug_printf("ALPHA TRIAL MODES\n"); + else + fmt_debug_printf("RGB TRIAL MODES\n"); + } + + astc_helpers::astc_block phys_block; + + for (uint32_t cem_mode_iter = 0; cem_mode_iter < 3; cem_mode_iter++) + { + const uint32_t s_rgb_cems[3] = { astc_helpers::CEM_LDR_LUM_DIRECT, astc_helpers::CEM_LDR_RGB_BASE_SCALE, astc_helpers::CEM_LDR_RGB_DIRECT }; + const uint32_t s_alpha_cems[3] = { astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT, astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A, astc_helpers::CEM_LDR_RGBA_DIRECT }; + + const uint32_t cem_index = alpha_iter ? s_alpha_cems[cem_mode_iter] : s_rgb_cems[cem_mode_iter]; + + uint32_t num_dp_chans = 0; + bool cem_supports_dual_plane = false; + bool cem_supports_subsets = false; + + // base+ofs variants are automatically used later as alternates to RGB/RGBA direct modes + switch (cem_index) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: + num_dp_chans = 0; // only a single component, so only a single plane + cem_supports_dual_plane = false; + cem_supports_subsets = true; + break; + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + num_dp_chans = 1; // CCS can only be 3 + cem_supports_dual_plane = true; + cem_supports_subsets = true; + break; + case astc_helpers::CEM_LDR_RGB_DIRECT: + num_dp_chans = 3; + cem_supports_dual_plane = true; + cem_supports_subsets = true; + break; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: + num_dp_chans = 3; + cem_supports_dual_plane = true; + cem_supports_subsets = true; + break; + case astc_helpers::CEM_LDR_RGBA_DIRECT: + num_dp_chans = 4; + cem_supports_dual_plane = true; + cem_supports_subsets = true; + break; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + num_dp_chans = 4; + cem_supports_dual_plane = true; + cem_supports_subsets = true; + break; + default: + assert(0); + break; + } + + for (int dp = 0; dp < (cem_supports_dual_plane ? 2 : 1); dp++) + { + const bool use_subsets = !dp && cem_supports_subsets; + + for (int subsets = 1; subsets <= (use_subsets ? 3 : 1); subsets++) + { + for (uint32_t grid_height = 2; grid_height <= block_height; grid_height++) + { + for (uint32_t grid_width = 2; grid_width <= block_width; grid_width++) + { + for (uint32_t dp_chan_index = 0; dp_chan_index < (dp ? num_dp_chans : 1); dp_chan_index++) + { + astc_helpers::log_astc_block log_block; + log_block.clear(); + + log_block.m_grid_width = (uint8_t)grid_width; + log_block.m_grid_height = (uint8_t)grid_height; + + log_block.m_num_partitions = (uint8_t)subsets; + + for (int i = 0; i < subsets; i++) + log_block.m_color_endpoint_modes[i] = (uint8_t)cem_index; + + log_block.m_dual_plane = dp > 0; + + if (log_block.m_dual_plane) + { + uint32_t ccs_index = dp_chan_index; + + if (cem_index == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT) + { + // must be 3 for LA if DP is enabled + ccs_index = 3; + } + + log_block.m_color_component_selector = (uint8_t)ccs_index; + } + + for (uint32_t weight_ise_range = astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE; weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE; weight_ise_range++) + { + log_block.m_weight_ise_range = (uint8_t)weight_ise_range; + log_block.m_endpoint_ise_range = astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE; // dummy value + + total_partial_evals++; + + bool success = astc_helpers::pack_astc_block(phys_block, log_block, nullptr, nullptr, astc_helpers::cValidateEarlyOutAtEndpointISEChecks); + if (!success) + continue; + + // in reality only 1 endpoint ISE range is valid here + for (uint32_t endpoint_ise_range = astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE; endpoint_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE; endpoint_ise_range++) + { + log_block.m_endpoint_ise_range = (uint8_t)endpoint_ise_range; + + total_evals++; + + success = astc_helpers::pack_astc_block(phys_block, log_block, nullptr, nullptr, astc_helpers::cValidateSkipFinalEndpointWeightPacking); + if (!success) + continue; + + total_evals_succeeded++; + + if (print_modes) + { + fmt_debug_printf("{}: CEM: {} DP: {}, CCS: {}, SUBSETS: {}, GRID: {}x{}, ENDPOINTS: {}, WEIGHTS: {}\n", + mode_index, + log_block.m_color_endpoint_modes[0], + log_block.m_dual_plane, + log_block.m_color_component_selector, + log_block.m_num_partitions, + log_block.m_grid_width, log_block.m_grid_height, + astc_helpers::get_ise_levels(log_block.m_endpoint_ise_range), + astc_helpers::get_ise_levels(log_block.m_weight_ise_range)); + } + + basist::astc_ldr_t::trial_mode m; + m.m_ccs_index = log_block.m_dual_plane ? log_block.m_color_component_selector : -1; + m.m_cem = log_block.m_color_endpoint_modes[0]; + m.m_endpoint_ise_range = log_block.m_endpoint_ise_range; + m.m_weight_ise_range = log_block.m_weight_ise_range; + m.m_grid_width = grid_width; + m.m_grid_height = grid_height; + m.m_num_parts = log_block.m_num_partitions; + + uint32_t packed_index = pack_tm_desc( + log_block.m_grid_width, log_block.m_grid_height, + log_block.m_color_endpoint_modes[0], log_block.m_dual_plane ? (log_block.m_color_component_selector + 1) : 0, log_block.m_num_partitions, + log_block.m_endpoint_ise_range, log_block.m_weight_ise_range); + + assert(packed_index <= 0xFFFFFF); + packed_mode_ids.push_back(packed_index); + + grouped_encoder_trial_modes.add(block_width, block_height, m, encoder_trial_modes.size_u32()); + + encoder_trial_modes.push_back(m); + + max_grid_width = maximum(max_grid_width, grid_width); + max_grid_height = maximum(max_grid_height, grid_height); + + ++mode_index; + + } // weight_ise_range + } // endpoint_ise_range + + } // ccs_index + + } // grid_width + + } // grid_height + + } // subsets + + } // dp + + } // cem_mode_iter + + } // alpha_iter + +#if 0 + packed_mode_ids.sort(); + + for (uint32_t i = 0; i < packed_mode_ids.size(); i++) + { + uint32_t packed_index = packed_mode_ids[i]; + + fmt_debug_printf("{},{},{},", packed_index & 0xFF, (packed_index >> 8) & 0xFF, (packed_index >> 16) & 0xFF); + if ((i & 15) == 15) + fmt_debug_printf("\n"); + } +#endif + + if (print_debug_info) + { + fmt_debug_printf("create_encoder_trial_modes_full_eval() time: {} secs\n", itm.get_elapsed_secs()); + + fmt_debug_printf("create_encoder_trial_modes_full_eval() - ASTC {}x{} modes\n", block_width, block_height); + fmt_debug_printf("total_evals: {}, total_partial_evals: {}, total_evals_succeeded: {}\n", total_evals, total_partial_evals, total_evals_succeeded); + fmt_debug_printf("Total trial modes: {}\n", (uint32_t)encoder_trial_modes.size()); + fmt_debug_printf("Total used trial mode groups: {}\n", grouped_encoder_trial_modes.count_used_groups()); + fmt_debug_printf("Max ever grid dimensions: {}x{}\n", max_grid_width, max_grid_height); + } + + // sanity check + assert(encoder_trial_modes.size() < 11000); +} + +const uint32_t TOTAL_RGBA_CHAN_PAIRS = 6; +//const uint32_t TOTAL_RGB_CHAN_PAIRS = 3; +static const uint8_t g_rgba_chan_pairs[TOTAL_RGBA_CHAN_PAIRS][2] = +{ + { 0, 1 }, + { 0, 2 }, + { 1, 2 }, + { 0, 3 }, + { 1, 3 }, + { 2, 3 } +}; + +bool encoder_trial_mode_test() +{ + for (uint32_t w = 4; w <= 12; w++) + { + for (uint32_t h = 4; h <= 12; h++) + { + if (!astc_helpers::is_valid_block_size(w, h)) + continue; + + basisu::vector encoder_trial_modes_orig; + basist::astc_ldr_t::grouped_trial_modes grouped_encoder_trial_modes_orig; + + create_encoder_trial_modes_full_eval(w, h, + encoder_trial_modes_orig, grouped_encoder_trial_modes_orig, + false, false); + + fmt_debug_printf("Testing block size {}x{}, {} total modes\n", w, h, encoder_trial_modes_orig.size_u32()); + + basisu::hash_map trial_mode_hash; + for (uint32_t i = 0; i < encoder_trial_modes_orig.size(); i++) + { + trial_mode_hash.insert(encoder_trial_modes_orig[i]); + } + + basisu::vector encoder_trial_modes_new; + basist::astc_ldr_t::grouped_trial_modes grouped_encoder_trial_modes_new; + + basist::astc_ldr_t::create_encoder_trial_modes_table(w, h, + encoder_trial_modes_new, grouped_encoder_trial_modes_new, + false, false); + + if (encoder_trial_modes_new.size() != encoder_trial_modes_orig.size()) + { + fmt_error_printf("trial mode test failed!\n"); + + assert(0); + return false; + } + + for (uint32_t i = 0; i < encoder_trial_modes_new.size(); i++) + { + const basist::astc_ldr_t::trial_mode& tm = encoder_trial_modes_new[i]; + if (trial_mode_hash.find(tm) == trial_mode_hash.end()) + { + fmt_error_printf("trial mode test failed!\n"); + + assert(0); + return false; + } + } + + } // h + } // w + + fmt_debug_printf("trial mode test succeeded\n"); + return true; +} + +//---------------------------------------------------------------------------------- + +struct ldr_astc_block_encode_image_high_level_config +{ + uint32_t m_block_width = 6; + uint32_t m_block_height = 6; + + bool m_second_superpass_refinement = true; + float m_second_superpass_fract_to_recompress = .075f; + + bool m_third_superpass_try_neighbors = true; + + float m_base_q = 75.0f; + bool m_use_dct = false; + + bool m_subsets_enabled = true; + bool m_subsets_edge_filtering = true; + + bool m_filter_by_pca_angles_flag = true; + float m_use_direct_angle_thresh = 2.0f; + float m_use_base_scale_angle_thresh = 7.0f; + + bool m_force_all_dual_plane_chan_evals = false; // much slower, test on base + bool m_disable_rgb_dual_plane = false; // DP can be on alpha only, if block has alpha + float m_strong_dp_decorr_thresh_rgb = .998f; + + bool m_use_base_ofs = true; + bool m_use_blue_contraction = true; + + bool m_grid_hv_filtering = true; + bool m_low_freq_block_filtering = true; + + uint32_t m_superbucket_max_to_retain[3] = { 4, 8, 16 }; + + float m_final_shortlist_fraction[3] = { .25f, .33f, .5f }; + uint32_t m_final_shortlist_min_size[3] = { 1, 1, 1 }; + uint32_t m_final_shortlist_max_size[3] = { 4096, 4096, 4096 }; + + uint32_t m_part2_fraction_to_keep = 2; + uint32_t m_part3_fraction_to_keep = 2; + uint32_t m_base_parts2 = 32; + uint32_t m_base_parts3 = 32; + + float m_early_stop_wpsnr = 0.0f; + float m_early_stop2_wpsnr = 0.0f; + + bool m_blurring_enabled = false; + bool m_blurring_enabled_p2 = false; + + bool m_gradient_descent_flag = true; + bool m_polish_weights_flag = true; + bool m_qcd_enabled_flag = true; // gradient descent must be enabled too + bool m_bucket_pruning_passes = true; + + // 2nd superpass options + uint32_t m_base_parts2_p2 = 64; + uint32_t m_base_parts3_p2 = 64; + uint32_t m_superbucket_max_to_retain_p2[3] = { 16, 32, 256 }; + uint32_t m_final_shortlist_max_size_p2[3] = { 4096, 4096, 4096 }; + uint32_t m_second_pass_total_weight_refine_passes = astc_ldr::WEIGHT_REFINER_MAX_PASSES; + bool m_second_pass_force_subsets_enabled = true; + bool m_force_all_dp_chans_p2 = false; + bool m_final_encode_always_try_rgb_direct = false; + bool m_filter_by_pca_angles_flag_p2 = true; + + // only store the single best result per block + //bool m_save_single_result = false; + + bool m_debug_images = false; + bool m_debug_output = false; + + std::string m_debug_file_prefix; + + job_pool* m_pJob_pool; + + //saliency_map m_saliency_map; + + astc_ldr::cem_encode_params m_cem_enc_params; +}; + +struct ldr_astc_block_encode_image_output +{ + ldr_astc_block_encode_image_output() + { + } + + ~ldr_astc_block_encode_image_output() + { + interval_timer itm; + itm.start(); + + const int num_blocks_x = m_image_block_info.get_width(); + const int num_blocks_y = m_image_block_info.get_height(); + + for (int y = num_blocks_y - 1; y >= 0; --y) + { + for (int x = num_blocks_x - 1; x >= 0; --x) + { + auto& out_blocks = m_image_block_info(x, y).m_out_blocks; + out_blocks.clear(); + } + } // y + + //fmt_debug_printf("Cleared enc_out image block info: {3.3} secs\n", itm.get_elapsed_secs()); + } + + astc_ldr::partitions_data m_part_data_p2; + astc_ldr::partitions_data m_part_data_p3; + + basisu::vector m_encoder_trial_modes; + basist::astc_ldr_t::grouped_trial_modes m_grouped_encoder_trial_modes; + + vector2D m_packed_phys_blocks; + + struct block_info + { + block_info() + { + m_pixel_stats.clear(); + } + + astc_ldr::pixel_stats_t m_pixel_stats; // of original/input block + + basisu::vector m_out_blocks; + + uint32_t m_packed_out_block_index = 0; // index of best out block by WSSE + + bool m_low_freq_block_flag = false; + bool m_super_strong_edges = false; + bool m_very_strong_edges = false; + bool m_strong_edges = false; + }; + + vector2D m_image_block_info; + + struct block_info_superpass1 + { + int m_config_reuse_neighbor_out_block_indices[basist::astc_ldr_t::cMaxConfigReuseNeighbors] = { cInvalidIndex, cInvalidIndex, cInvalidIndex }; + + bool m_config_reuse_new_neighbor_out_block_flags[basist::astc_ldr_t::cMaxConfigReuseNeighbors] = { false, false, false }; + + basisu::vector m_new_out_config_reuse_blocks; + basisu::vector m_new_out_config_endpoint_reuse_blocks; + }; + + vector2D m_image_block_info_superpass2; + +private: + ldr_astc_block_encode_image_output(const ldr_astc_block_encode_image_output&); + ldr_astc_block_encode_image_output& operator= (const ldr_astc_block_encode_image_output&); +}; + +constexpr bool selective_blurring = true; + +bool ldr_astc_block_encode_image( + const image& orig_img, + const ldr_astc_block_encode_image_high_level_config& enc_cfg, + ldr_astc_block_encode_image_output& enc_out) +{ + if (enc_cfg.m_debug_output) + fmt_debug_printf("ldr_astc_block_encode_image:\n"); + + const uint32_t block_width = enc_cfg.m_block_width, block_height = enc_cfg.m_block_height; + const uint32_t width = orig_img.get_width(), height = orig_img.get_height(); + const uint32_t total_pixels = width * height; + const uint32_t total_block_pixels = enc_cfg.m_block_width * enc_cfg.m_block_height; + const uint32_t num_blocks_x = orig_img.get_block_width(enc_cfg.m_block_width); + const uint32_t num_blocks_y = orig_img.get_block_height(enc_cfg.m_block_height); + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (enc_cfg.m_debug_output) + { + fmt_debug_printf("ASTC base bitrate: {3.3} bpp\n", 128.0f / (float)(enc_cfg.m_block_width * enc_cfg.m_block_height)); + + fmt_debug_printf("ASTC block size: {}x{}\n", enc_cfg.m_block_width, enc_cfg.m_block_height); + } + + if (enc_cfg.m_debug_output) + fmt_debug_printf("Image has alpha: {}\n", orig_img.has_alpha()); + + astc_ldr::partitions_data* pPart_data_p2 = &enc_out.m_part_data_p2; + pPart_data_p2->init(2, enc_cfg.m_block_width, enc_cfg.m_block_height); + + astc_ldr::partitions_data* pPart_data_p3 = &enc_out.m_part_data_p3; + pPart_data_p3->init(3, enc_cfg.m_block_width, enc_cfg.m_block_height); + + // blurring coefficients + const float bw0 = 1.15f; + const float bw1 = 1.25f, bw1_a = 1.0f; + const float bw2 = 1.25f; + + // TODO: Make this optional/tune this, add only 2 level blurring support + image orig_img_blurred2, orig_img_blurred3, orig_img_blurred4, orig_img_blurred5; + + if ((enc_cfg.m_blurring_enabled) || (enc_cfg.m_blurring_enabled_p2)) + { + orig_img_blurred2.resize(orig_img.get_width(), orig_img.get_height()); + orig_img_blurred3.resize(orig_img.get_width(), orig_img.get_height()); + orig_img_blurred4.resize(orig_img.get_width(), orig_img.get_height()); + orig_img_blurred5.resize(orig_img.get_width(), orig_img.get_height()); + + image_resample(orig_img, orig_img_blurred2, true, "gaussian", bw0); + image_resample(orig_img, orig_img_blurred3, true, "gaussian", bw1, false, 0, 4, bw1_a); + image_resample(orig_img, orig_img_blurred4, true, "gaussian", bw1_a, false, 0, 4, bw1); + image_resample(orig_img, orig_img_blurred5, true, "gaussian", bw2, false); + } + + if (enc_cfg.m_debug_images) + { + save_png(enc_cfg.m_debug_file_prefix + "dbg_astc_ldr_orig_img.png", orig_img); + + if ((enc_cfg.m_blurring_enabled) || (enc_cfg.m_blurring_enabled_p2)) + { + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_blurred2.png", orig_img_blurred2); + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_blurred3.png", orig_img_blurred3); + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_blurred4.png", orig_img_blurred4); + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_blurred5.png", orig_img_blurred5); + } + } + + if (enc_cfg.m_debug_output) + fmt_debug_printf("Dimensions: {}x{}, Blocks: {}x{}, Total blocks: {}\n", width, height, num_blocks_x, num_blocks_y, total_blocks); + + image orig_img_sobel_x, orig_img_sobel_y; + compute_sobel(orig_img, orig_img_sobel_x, &g_sobel_x[0][0]); + compute_sobel(orig_img, orig_img_sobel_y, &g_sobel_y[0][0]); + + if (enc_cfg.m_debug_images) + { + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_sobel_x.png", orig_img_sobel_x); + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_sobel_y.png", orig_img_sobel_y); + } + + image orig_img_sobel_xy(width, height); + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba& sx = orig_img_sobel_x(x, y); + const color_rgba& sy = orig_img_sobel_y(x, y); + + orig_img_sobel_xy(x, y).set( + iabs((int)sx.r - 128) + iabs((int)sy.r - 128), + iabs((int)sx.g - 128) + iabs((int)sy.g - 128), + iabs((int)sx.b - 128) + iabs((int)sy.b - 128), + iabs((int)sx.a - 128) + iabs((int)sy.a - 128)); + } + } + + if (enc_cfg.m_debug_images) + save_png(enc_cfg.m_debug_file_prefix + "vis_orig_sobel_xy.png", orig_img_sobel_xy); + + vector2D& packed_blocks = enc_out.m_packed_phys_blocks; + packed_blocks.resize(num_blocks_x, num_blocks_y); + memset(packed_blocks.get_ptr(), 0, packed_blocks.size_in_bytes()); + + assert(enc_cfg.m_pJob_pool); + job_pool& job_pool = *enc_cfg.m_pJob_pool; + + std::atomic encoder_failed_flag; + encoder_failed_flag.store(false); + + std::mutex global_mutex; + + basisu::vector& encoder_trial_modes = enc_out.m_encoder_trial_modes; + encoder_trial_modes.reserve(4096); + + basist::astc_ldr_t::grouped_trial_modes& grouped_encoder_trial_modes = enc_out.m_grouped_encoder_trial_modes; + basist::astc_ldr_t::create_encoder_trial_modes_table(block_width, block_height, encoder_trial_modes, grouped_encoder_trial_modes, enc_cfg.m_debug_output, false); + + if (enc_cfg.m_debug_output) + { + uint32_t total_actual_modes = encoder_trial_modes.size_u32(); + + if (enc_cfg.m_use_base_ofs) + { + for (uint32_t i = 0; i < encoder_trial_modes.size(); i++) + { + const auto& tm = encoder_trial_modes[i]; + + switch (tm.m_cem) + { + case astc_helpers::CEM_LDR_RGBA_DIRECT: + case astc_helpers::CEM_LDR_RGB_DIRECT: + // add base+ofs variant + total_actual_modes++; + break; + default: + break; + } + } // i + } + + fmt_debug_printf("Base encoder trial modes: {}, grand total including base+ofs CEM's: {}\n", encoder_trial_modes.size_u32(), total_actual_modes); + } + + uint_vec used_rgb_direct_count; + used_rgb_direct_count.resize(encoder_trial_modes.size()); + + uint_vec used_base_offset_count; + used_base_offset_count.resize(encoder_trial_modes.size()); + + uint32_t total_void_extent_blocks_skipped = 0; + + uint32_t total_superbuckets_created = 0; + uint32_t total_buckets_created = 0; + uint32_t total_surrogate_encodes = 0; + uint32_t total_full_encodes = 0; + uint32_t total_shortlist_candidates = 0; + uint32_t total_full_encodes_pass1 = 0; + uint32_t total_full_encodes_pass2 = 0; + + uint32_t total_blur_encodes = 0; + uint32_t total_blurred_blocks1 = 0; + uint32_t total_blurred_blocks2 = 0; + uint32_t total_blurred_blocks3 = 0; + uint32_t total_blurred_blocks4 = 0; + + basist::astc_ldr_t::dct2f dct; + dct.init(enc_cfg.m_block_height, enc_cfg.m_block_width); + + image vis_part_usage_img, vis_part_pat_img, vis_strong_edge, vis_dct_low_freq_block, vis_dp_img, vis_base_ofs_img; + if (enc_cfg.m_debug_images) + { + vis_part_usage_img.resize(block_width * num_blocks_x, block_height * num_blocks_y); + vis_part_pat_img.resize(block_width * num_blocks_x, block_height * num_blocks_y); + vis_strong_edge.resize(block_width * num_blocks_x, block_height * num_blocks_y); + vis_dct_low_freq_block.resize(block_width * num_blocks_x, block_height * num_blocks_y); + vis_dp_img.resize(block_width * num_blocks_x, block_height * num_blocks_y); + vis_base_ofs_img.resize(block_width * num_blocks_x, block_height * num_blocks_y); + } + + ldr_astc_lowlevel_block_encoder_pool encoder_pool; + assert(job_pool.get_total_threads()); + encoder_pool.init((uint32_t)job_pool.get_total_threads()); + + basist::astc_ldr_t::grid_weight_dct grid_coder; + grid_coder.init(block_width, block_height); + + struct output_block_devel_desc + { + const basist::astc_ldr_t::trial_mode* m_pTrial_modes; + int m_trial_mode_index; // this is the index of the mode it tried to encode, but the actual output/enc block could have used base+ofs + bool m_had_alpha; + + bool m_low_freq_block_flag; + bool m_super_strong_edges; + bool m_very_strong_edges; + bool m_strong_edges; + + void clear() + { + clear_obj(*this); + } + }; + + enc_out.m_image_block_info.resize(0, 0); + enc_out.m_image_block_info.resize(num_blocks_x, num_blocks_y); + +#if 0 + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + auto& out_blocks = enc_out.m_image_block_info(x, y).m_out_blocks; + out_blocks.reserve(16); + out_blocks.resize(0); + } + } // y +#endif + + vector2D superpass2_recompress_block_flags; + + if (enc_cfg.m_second_superpass_refinement) + superpass2_recompress_block_flags.resize(num_blocks_x, num_blocks_y); + + if (enc_cfg.m_third_superpass_try_neighbors) + enc_out.m_image_block_info_superpass2.resize(num_blocks_x, num_blocks_y); + + interval_timer itm; + itm.start(); + + //-------------------------------------------------------------------------------------- + // ASTC compression loop + + vector2D output_block_devel_info(num_blocks_x, num_blocks_y); + + uint32_t total_superpasses = 1; + if (enc_cfg.m_third_superpass_try_neighbors) + total_superpasses = 3; + else if (enc_cfg.m_second_superpass_refinement) + total_superpasses = 2; + + uint32_t total_blocks_to_recompress = 0; + + for (uint32_t superpass_index = 0; superpass_index < total_superpasses; superpass_index++) + { + if (superpass_index == 1) + { + if (!enc_cfg.m_second_superpass_refinement) + continue; + if (!total_blocks_to_recompress) + continue; + } + + if (enc_cfg.m_debug_output) + fmt_debug_printf("ASTC packing superpass: {}\n", 1 + superpass_index); + + uint32_t total_blocks_done = 0; + float last_printed_progress_val = -100.0f; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + job_pool.add_job([superpass_index, + //width, height, + bx, by, + //num_blocks_x, num_blocks_y, + total_blocks, block_width, block_height, total_block_pixels, &packed_blocks, &global_mutex, + &orig_img, &orig_img_sobel_xy, &orig_img_blurred2, &orig_img_blurred3, &orig_img_blurred4, &orig_img_blurred5, + &enc_cfg, &encoder_failed_flag, pPart_data_p2, pPart_data_p3, + &total_blocks_done, &total_superbuckets_created, &total_buckets_created, &total_surrogate_encodes, &total_full_encodes, &total_shortlist_candidates, + &encoder_trial_modes, + &total_blur_encodes, &total_blurred_blocks1, + &total_full_encodes_pass1, &total_full_encodes_pass2, + &dct, &vis_dct_low_freq_block, + &encoder_pool, &grid_coder, &grouped_encoder_trial_modes, + &enc_out, &output_block_devel_info, &total_void_extent_blocks_skipped, &superpass2_recompress_block_flags, &total_blocks_to_recompress, &last_printed_progress_val] + { + if (encoder_failed_flag) + return; + + //const uint32_t base_x = bx * block_width, base_y = by * block_height; + + color_rgba block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + orig_img.extract_block_clamped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + + if (superpass_index == 2) + { + // Superpass 2: Encode to best neighbor configurations + const ldr_astc_block_encode_image_output::block_info& out_block_info = enc_out.m_image_block_info(bx, by); + + ldr_astc_block_encode_image_output::block_info_superpass1& out_block_info_superpass1 = enc_out.m_image_block_info_superpass2(bx, by); + + const astc_ldr::pixel_stats_t& pixel_stats = out_block_info.m_pixel_stats; + + const bool is_purely_solid_block = (pixel_stats.m_min == pixel_stats.m_max); + + // if void extent, just skip + if (is_purely_solid_block) + return; + + //const basisu::vector& out_blocks = out_block_info.m_out_blocks; + + for (uint32_t neighbor_index = 0; neighbor_index < basist::astc_ldr_t::cMaxConfigReuseNeighbors; neighbor_index++) + { + const ldr_astc_block_encode_image_output::block_info* pNeighbor_out_block_info = nullptr; + + if (neighbor_index == 0) + { + // Left + if (bx) + pNeighbor_out_block_info = &enc_out.m_image_block_info(bx - 1, by); + } + else if (neighbor_index == 1) + { + // Up + if (by) + pNeighbor_out_block_info = &enc_out.m_image_block_info(bx, by - 1); + } + else + { + assert(neighbor_index == 2); + + // Diagonal + if ((bx) && (by)) + pNeighbor_out_block_info = &enc_out.m_image_block_info(bx - 1, by - 1); + } + + if (!pNeighbor_out_block_info) + continue; + + const encode_block_output& neighbor_output = pNeighbor_out_block_info->m_out_blocks[pNeighbor_out_block_info->m_packed_out_block_index]; + + // Best neighbor was solid, skip it (TODO: reusing it is possible) + if (neighbor_output.m_log_blk.m_solid_color_flag_ldr) + continue; + + const uint32_t neighbor_tm_index = neighbor_output.m_trial_mode_index; + assert(neighbor_tm_index < encoder_trial_modes.size()); + + //const trial_mode& neighbor_tm = encoder_trial_modes[neighbor_tm_index]; // do not use the tm's cem, it may be base+ofs, use the log blk instead + + const astc_helpers::log_astc_block& neighbor_log_blk = neighbor_output.m_log_blk; + assert(!neighbor_log_blk.m_solid_color_flag_ldr); + + const uint32_t neighbor_actual_cem = neighbor_log_blk.m_color_endpoint_modes[0]; + const uint32_t neighbor_partition_id = neighbor_log_blk.m_partition_id; + + // See if we've already encoded this full config + int already_existing_out_block_index = cInvalidIndex; + for (uint32_t i = 0; i < out_block_info.m_out_blocks.size(); i++) + { + if ((out_block_info.m_out_blocks[i].m_trial_mode_index == (int)neighbor_tm_index) && + (out_block_info.m_out_blocks[i].m_log_blk.m_color_endpoint_modes[0] == neighbor_actual_cem) && + (out_block_info.m_out_blocks[i].m_log_blk.m_partition_id == neighbor_partition_id)) + { + already_existing_out_block_index = i; + break; + } + } + + if (already_existing_out_block_index != cInvalidIndex) + { + // We already have an output block using this neighbor trial mode, skip + out_block_info_superpass1.m_config_reuse_neighbor_out_block_indices[neighbor_index] = (uint32_t)already_existing_out_block_index; + out_block_info_superpass1.m_config_reuse_new_neighbor_out_block_flags[neighbor_index] = false; + } + else + { + // Re-encode using the neighbor's full config (tm, base+ofs, partition ID) + astc_helpers::log_astc_block new_log_block; + + bool status = false; + + if (neighbor_log_blk.m_num_partitions > 1) + { + const astc_ldr::partitions_data* pPart_data = (neighbor_log_blk.m_num_partitions == 2) ? pPart_data_p2 : pPart_data_p3; + + const uint32_t part_seed_index = neighbor_log_blk.m_partition_id; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + + assert(part_unique_index < astc_helpers::NUM_PARTITION_PATTERNS); + const astc_ldr::partition_pattern_vec* pPat = &pPart_data->m_partition_pats[part_unique_index]; + + bool refine_only_flag = false; + + status = encode_trial_subsets( + block_width, block_height, + pixel_stats, + neighbor_log_blk.m_color_endpoint_modes[0], neighbor_log_blk.m_num_partitions, neighbor_log_blk.m_partition_id, pPat, + neighbor_log_blk.m_endpoint_ise_range, neighbor_log_blk.m_weight_ise_range, + neighbor_log_blk.m_grid_width, neighbor_log_blk.m_grid_height, + new_log_block, + enc_cfg.m_cem_enc_params, + refine_only_flag, + enc_cfg.m_gradient_descent_flag, enc_cfg.m_polish_weights_flag, enc_cfg.m_qcd_enabled_flag, + enc_cfg.m_use_blue_contraction); + } + else + { + status = encode_trial( + block_width, block_height, + pixel_stats, + neighbor_log_blk.m_color_endpoint_modes[0], + neighbor_log_blk.m_dual_plane, neighbor_log_blk.m_dual_plane ? neighbor_log_blk.m_color_component_selector : -1, + neighbor_log_blk.m_endpoint_ise_range, neighbor_log_blk.m_weight_ise_range, + neighbor_log_blk.m_grid_width, neighbor_log_blk.m_grid_height, + new_log_block, + enc_cfg.m_cem_enc_params, + enc_cfg.m_gradient_descent_flag, enc_cfg.m_polish_weights_flag, enc_cfg.m_qcd_enabled_flag, + enc_cfg.m_use_blue_contraction); + } + + if (!status) + { + fmt_debug_printf("encode_trial/encode_trial_subsets failed in superpass 1!\n"); + encoder_failed_flag.store(true); + return; + } + + out_block_info_superpass1.m_config_reuse_neighbor_out_block_indices[neighbor_index] = out_block_info_superpass1.m_new_out_config_reuse_blocks.size_u32(); + out_block_info_superpass1.m_config_reuse_new_neighbor_out_block_flags[neighbor_index] = true; + + encode_block_output& new_output_blk = *out_block_info_superpass1.m_new_out_config_reuse_blocks.enlarge(1); + + new_output_blk.clear(); + + if (enc_cfg.m_use_dct) + { + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, new_log_block.m_grid_width, new_log_block.m_grid_height); + + const uint32_t num_planes = (new_log_block.m_dual_plane ? 2 : 1); + + for (uint32_t plane_index = 0; plane_index < num_planes; plane_index++) + { + bitwise_coder c; + basist::astc_ldr_t::dct_syms syms; + code_block_weights(grid_coder, enc_cfg.m_base_q, plane_index, new_log_block, pGrid_data, c, syms); + + new_output_blk.m_packed_dct_plane_data[plane_index] = syms; + + c.flush(); + + basist::bitwise_decoder d; + d.init(c.get_bytes().data(), c.get_bytes().size_u32()); + + // ensure existing weights get blown away + for (uint32_t i = 0; i < (uint32_t)(new_log_block.m_grid_width * new_log_block.m_grid_height); i++) + new_log_block.m_weights[i * num_planes + plane_index] = 0; + + basist::astc_ldr_t::fvec dct_temp; + bool dec_status = grid_coder.decode_block_weights(enc_cfg.m_base_q, plane_index, new_log_block, &d, pGrid_data, nullptr, dct_temp, nullptr); + + assert(dec_status); + if (!dec_status) + { + error_printf("grid_coder.decode_block_weights() failed!\n"); + + encoder_failed_flag.store(true); + return; + } + } + } // if (enc_cfg.m_use_dct) + + new_output_blk.m_trial_mode_index = safe_cast_int16(neighbor_tm_index); + new_output_blk.m_log_blk = new_log_block; + //new_output_blk.m_trial_surrogate.clear(); + + new_output_blk.m_sse = eval_error(block_width, block_height, new_log_block, pixel_stats, enc_cfg.m_cem_enc_params); + + { + std::lock_guard g(global_mutex); + + total_full_encodes_pass2++; + } + } // if (already_existing_out_block_index != cInvalidIndex) + + { + // Re-encode using the neighbor's full config (tm, base+ofs, partition ID) AND its endpoints + astc_helpers::log_astc_block new_log_block(neighbor_log_blk); + + // Start with fresh 0 weights, then polish them. + clear_obj(new_log_block.m_weights); + + //const bool use_blue_contraction = enc_cfg.m_use_blue_contraction; + + bool improved_flag = false; + + const astc_ldr::partition_pattern_vec* pPat = nullptr; + if (neighbor_log_blk.m_num_partitions > 1) + { + const astc_ldr::partitions_data* pPart_data = (neighbor_log_blk.m_num_partitions == 2) ? pPart_data_p2 : pPart_data_p3; + + const uint32_t part_seed_index = neighbor_log_blk.m_partition_id; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + + assert(part_unique_index < astc_helpers::NUM_PARTITION_PATTERNS); + pPat = &pPart_data->m_partition_pats[part_unique_index]; + } + + bool status = polish_block_weights( + block_width, block_height, + pixel_stats, + new_log_block, + enc_cfg.m_cem_enc_params, pPat, improved_flag, + enc_cfg.m_gradient_descent_flag, enc_cfg.m_polish_weights_flag, enc_cfg.m_qcd_enabled_flag); + + if (!status) + { + fmt_error_printf("polish_block_weights failed in superpass 1!\n"); + encoder_failed_flag.store(true); + return; + } + + encode_block_output& new_output_blk = *out_block_info_superpass1.m_new_out_config_endpoint_reuse_blocks.enlarge(1); + + new_output_blk.clear(); + + if (enc_cfg.m_use_dct) + { + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, new_log_block.m_grid_width, new_log_block.m_grid_height); + + const uint32_t num_planes = (new_log_block.m_dual_plane ? 2 : 1); + + for (uint32_t plane_index = 0; plane_index < num_planes; plane_index++) + { + bitwise_coder c; + basist::astc_ldr_t::dct_syms syms; + code_block_weights(grid_coder, enc_cfg.m_base_q, plane_index, new_log_block, pGrid_data, c, syms); + + new_output_blk.m_packed_dct_plane_data[plane_index] = syms; + + c.flush(); + + basist::bitwise_decoder d; + d.init(c.get_bytes().data(), c.get_bytes().size_u32()); + + // ensure existing weights get blown away + for (uint32_t i = 0; i < (uint32_t)(new_log_block.m_grid_width * new_log_block.m_grid_height); i++) + new_log_block.m_weights[i * num_planes + plane_index] = 0; + + basist::astc_ldr_t::fvec dct_temp; + bool dec_status = grid_coder.decode_block_weights(enc_cfg.m_base_q, plane_index, new_log_block, &d, pGrid_data, nullptr, dct_temp, nullptr); + + assert(dec_status); + if (!dec_status) + { + error_printf("grid_coder.decode_block_weights() failed!\n"); + + encoder_failed_flag.store(true); + return; + } + } + } // if (enc_cfg.m_use_dct) + + new_output_blk.m_trial_mode_index = safe_cast_int16(neighbor_tm_index); + new_output_blk.m_log_blk = new_log_block; + //new_output_blk.m_trial_surrogate.clear(); + + new_output_blk.m_sse = eval_error(block_width, block_height, new_log_block, pixel_stats, enc_cfg.m_cem_enc_params); + + { + std::lock_guard g(global_mutex); + + total_full_encodes_pass2++; + } + } + + } // neighbor_index + } + else + { + if (superpass_index == 1) + { + if (!superpass2_recompress_block_flags(bx, by)) + return; + } + + // Superpass 0/2: core ASTC encoding + basisu::vector& out_blocks = enc_out.m_image_block_info(bx, by).m_out_blocks; + out_blocks.resize(0); + + astc_ldr::pixel_stats_t& pixel_stats = enc_out.m_image_block_info(bx, by).m_pixel_stats; + + if (superpass_index == 0) + pixel_stats.init(total_block_pixels, block_pixels); + + const bool is_purely_solid_block = (pixel_stats.m_min == pixel_stats.m_max); + + // early out on totally solid blocks + if (is_purely_solid_block) + { + encode_block_output* pOut = out_blocks.enlarge(1); + pOut->clear(); + + astc_helpers::log_astc_block& log_blk = pOut->m_log_blk; + + log_blk.clear(); + log_blk.m_solid_color_flag_ldr = true; + + for (uint32_t c = 0; c < 4; c++) + log_blk.m_solid_color[c] = pixel_stats.m_min[c]; + + // Expand each component to 16-bits + for (uint32_t c = 0; c < 4; c++) + log_blk.m_solid_color[c] |= (uint16_t)(log_blk.m_solid_color[c]) << 8u; + + pOut->m_sse = eval_error(block_width, block_height, log_blk, pixel_stats, enc_cfg.m_cem_enc_params); + + ldr_astc_block_encode_image_output::block_info& block_info_out = enc_out.m_image_block_info(bx, by); + + block_info_out.m_low_freq_block_flag = true; + block_info_out.m_super_strong_edges = false; + block_info_out.m_very_strong_edges = false; + block_info_out.m_strong_edges = false; + block_info_out.m_packed_out_block_index = 0; + + // Create packed ASTC block + astc_helpers::astc_block& best_phys_block = packed_blocks(bx, by); + bool pack_success = astc_helpers::pack_astc_block(best_phys_block, log_blk); + if (!pack_success) + { + encoder_failed_flag.store(true); + return; + } + + output_block_devel_desc& out_devel_desc = output_block_devel_info(bx, by); + out_devel_desc.m_low_freq_block_flag = true; + out_devel_desc.m_super_strong_edges = false; + out_devel_desc.m_very_strong_edges = false; + out_devel_desc.m_strong_edges = false; + + { + std::lock_guard g(global_mutex); + + total_void_extent_blocks_skipped++; + + total_blocks_done++; + } + + return; + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < 4; i++) + max_std_dev = maximum(max_std_dev, pixel_stats.m_rgba_stats[i].m_std_dev); + + bool is_lum_only = true; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba& c = pixel_stats.m_pixels[x + y * block_width]; + bool is_lum_texel = (c.r == c.g) && (c.r == c.b); + if (!is_lum_texel) + { + is_lum_only = false; + break; + } + } + if (is_lum_only) + break; + } + + basisu::vector block_dct_energy(total_block_pixels); + + bool filter_horizontally_flag = false; + bool low_freq_block_flag = 0; + + { + basisu::vector block_floats(total_block_pixels); + basisu::vector block_dct(total_block_pixels); + basist::astc_ldr_t::fvec work; + + for (uint32_t c = 0; c < 4; c++) + { + for (uint32_t i = 0; i < total_block_pixels; i++) + block_floats[i] = pixel_stats.m_pixels_f[i][c]; + + dct.forward(block_floats.data(), block_dct.data(), work); + + for (uint32_t y = 0; y < block_height; y++) + for (uint32_t x = 0; x < block_width; x++) + block_dct_energy[x + y * block_width] += (float)enc_cfg.m_cem_enc_params.m_comp_weights[c] * squaref(block_dct[x + y * block_width]); + + } // c + + // Wipe DC + block_dct_energy[0] = 0.0f; + + float tot_energy = compute_preserved_dct_energy(block_width, block_height, block_dct_energy.get_ptr(), block_width, block_height); + + float h_energy_lost = compute_lost_dct_energy(block_width, block_height, block_dct_energy.get_ptr(), block_width / 2, block_height); + float v_energy_lost = compute_lost_dct_energy(block_width, block_height, block_dct_energy.get_ptr(), block_width, block_height / 2); + + filter_horizontally_flag = h_energy_lost < v_energy_lost; + + float hv2_lost_energy_fract = compute_lost_dct_energy(block_width, block_height, block_dct_energy.get_ptr(), 2, 2); + if (tot_energy) + hv2_lost_energy_fract /= tot_energy; + + if ((hv2_lost_energy_fract < .03f) || (max_std_dev < (1.0f / 255.0f))) + low_freq_block_flag = true; + } + + if (enc_cfg.m_debug_images) + vis_dct_low_freq_block.fill_box(bx * block_width, by * block_height, block_width, block_height, low_freq_block_flag ? color_rgba(255, 0, 0, 255) : g_black_color); + + bool active_chan_flags[4] = { }; + + // The number of channels with non-zero spans + uint32_t total_active_chans = 0; + // The indices of the channels with non-zero spans. + //uint32_t active_chan_list[4] = { 0 }; + + for (uint32_t i = 0; i < 4; i++) + { + if (pixel_stats.m_rgba_stats[i].m_range > 0.0f) + { + assert(pixel_stats.m_max[i] != pixel_stats.m_min[i]); + + active_chan_flags[i] = true; + + //active_chan_list[total_active_chans] = i; + total_active_chans++; + } + else + { + assert(pixel_stats.m_max[i] == pixel_stats.m_min[i]); + } + } + + basisu::comparative_stats cross_chan_stats[TOTAL_RGBA_CHAN_PAIRS]; + + // def=max correlation for each channel pair (or 1 if one of the channels is inactive) + float chan_pair_correlations[6] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; + // 0=0, 1 + // 1=0, 2 + // 2=1, 2 + // 3=0, 3 + // 4=1, 3 + // 5=2, 3 + + float min_corr = 1.0f, max_corr = 0.0f; + + for (uint32_t pair_index = 0; pair_index < TOTAL_RGBA_CHAN_PAIRS; pair_index++) + { + const uint32_t chanA = g_rgba_chan_pairs[pair_index][0]; + const uint32_t chanB = g_rgba_chan_pairs[pair_index][1]; + + // If both channels were active, we've got usable correlation statistics. + if (active_chan_flags[chanA] && active_chan_flags[chanB]) + { + // TODO: This can be directly derived from the 3D/4D covariance matrix entries. + cross_chan_stats[pair_index].calc_pearson(total_block_pixels, + &pixel_stats.m_pixels_f[0][chanA], + &pixel_stats.m_pixels_f[0][chanB], + 4, 4, + &pixel_stats.m_rgba_stats[chanA], + &pixel_stats.m_rgba_stats[chanB]); + + chan_pair_correlations[pair_index] = fabsf(cross_chan_stats[pair_index].m_pearson); + + const float c = fabsf((float)cross_chan_stats[pair_index].m_pearson); + min_corr = minimum(min_corr, c); + max_corr = maximum(max_corr, c); + } + } + + // min_cor will be 1.0f if all channels inactive (solid) + + // Pixel the trial modes the encoder will use: RGB or RGBA (we don't currently support trying both) + + const bool used_alpha_encoder_modes = pixel_stats.m_has_alpha; + + float sobel_energy = 0.0f; + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba& s = orig_img_sobel_xy.get_clamped(bx * block_width + x, by * block_height + y); + sobel_energy += s[0] * s[0] + s[1] * s[1] + s[2] * s[2] + s[3] * s[3]; + } // x + } // y + + sobel_energy /= (float)total_block_pixels; + + // Configure low-level block encoder. + ldr_astc_lowlevel_block_encoder_params enc_blk_params; + + enc_blk_params.m_block_width = block_width; + enc_blk_params.m_block_height = block_height; + enc_blk_params.m_total_block_pixels = total_block_pixels; + enc_blk_params.m_bx = bx; + enc_blk_params.m_by = by; + + enc_blk_params.m_pOrig_img_sobel_xy_t = &orig_img_sobel_xy; + + enc_blk_params.m_num_trial_modes = encoder_trial_modes.size_u32(); + enc_blk_params.m_pTrial_modes = encoder_trial_modes.get_ptr(); + enc_blk_params.m_pGrouped_trial_modes = &grouped_encoder_trial_modes; + + enc_blk_params.m_pPart_data_p2 = pPart_data_p2; + enc_blk_params.m_pPart_data_p3 = pPart_data_p3; + enc_blk_params.m_pEnc_params = &enc_cfg.m_cem_enc_params; + + float ang_dot = saturate(pixel_stats.m_zero_rel_axis3.dot3(pixel_stats.m_mean_rel_axis3)); + const float pca_axis_angles = acosf(ang_dot) * (180.0f / (float)cPiD); + + enc_blk_params.m_use_alpha_or_opaque_modes = used_alpha_encoder_modes; + enc_blk_params.m_use_lum_direct_modes = is_lum_only; + + const bool filter_by_pca_angles_flag = (superpass_index == 1) ? enc_cfg.m_filter_by_pca_angles_flag_p2 : enc_cfg.m_filter_by_pca_angles_flag; + if (!filter_by_pca_angles_flag) + { + enc_blk_params.m_use_direct_modes = true; + enc_blk_params.m_use_base_scale_modes = true; + } + else + { + // TODO: Make selective based off edge blocks? + enc_blk_params.m_use_direct_modes = (!total_active_chans) || (pca_axis_angles > enc_cfg.m_use_direct_angle_thresh); + enc_blk_params.m_use_base_scale_modes = (pca_axis_angles <= enc_cfg.m_use_base_scale_angle_thresh); + } + + enc_blk_params.m_grid_hv_filtering = enc_cfg.m_grid_hv_filtering; + enc_blk_params.m_filter_horizontally_flag = filter_horizontally_flag; + + enc_blk_params.m_use_small_grids_only = low_freq_block_flag && enc_cfg.m_low_freq_block_filtering; + + enc_blk_params.m_subsets_enabled = enc_cfg.m_subsets_enabled && (!low_freq_block_flag || !enc_cfg.m_subsets_edge_filtering); + + enc_blk_params.m_subsets_edge_filtering = enc_cfg.m_subsets_edge_filtering; + + enc_blk_params.m_use_blue_contraction = enc_cfg.m_use_blue_contraction; + enc_blk_params.m_final_encode_try_base_ofs = enc_cfg.m_use_base_ofs; + + memcpy(enc_blk_params.m_superbucket_max_to_retain, enc_cfg.m_superbucket_max_to_retain, sizeof(enc_cfg.m_superbucket_max_to_retain)); + + memcpy(enc_blk_params.m_final_shortlist_fraction, enc_cfg.m_final_shortlist_fraction, sizeof(enc_blk_params.m_final_shortlist_fraction)); + memcpy(enc_blk_params.m_final_shortlist_min_size, enc_cfg.m_final_shortlist_min_size, sizeof(enc_cfg.m_final_shortlist_min_size)); + memcpy(enc_blk_params.m_final_shortlist_max_size, enc_cfg.m_final_shortlist_max_size, sizeof(enc_blk_params.m_final_shortlist_max_size)); + + enc_blk_params.m_part2_fraction_to_keep = enc_cfg.m_part2_fraction_to_keep; + enc_blk_params.m_part3_fraction_to_keep = enc_cfg.m_part3_fraction_to_keep; + enc_blk_params.m_base_parts2 = enc_cfg.m_base_parts2; + enc_blk_params.m_base_parts3 = enc_cfg.m_base_parts3; + enc_blk_params.m_gradient_descent_flag = enc_cfg.m_gradient_descent_flag; + enc_blk_params.m_polish_weights_flag = enc_cfg.m_polish_weights_flag; + enc_blk_params.m_qcd_enabled_flag = enc_cfg.m_qcd_enabled_flag; + enc_blk_params.m_bucket_pruning_passes = enc_cfg.m_bucket_pruning_passes; + + enc_blk_params.m_alpha_cems = used_alpha_encoder_modes; + + enc_blk_params.m_early_stop_wpsnr = enc_cfg.m_early_stop_wpsnr; + enc_blk_params.m_early_stop2_wpsnr = enc_cfg.m_early_stop2_wpsnr; + + enc_blk_params.m_final_encode_always_try_rgb_direct = enc_cfg.m_final_encode_always_try_rgb_direct; + + enc_blk_params.m_pDCT2F = &dct; + + // Determine DP usage + if (enc_cfg.m_force_all_dual_plane_chan_evals) + { + for (uint32_t i = 0; i < 4; i++) + enc_blk_params.m_dp_active_chans[i] = active_chan_flags[i]; + } + else + { + for (uint32_t i = 0; i < 3; i++) + enc_blk_params.m_dp_active_chans[i] = false; + + // Being very conservative with alpha here - always let the analytical evaluator consider it. + enc_blk_params.m_dp_active_chans[3] = pixel_stats.m_has_alpha; + + if (!enc_cfg.m_disable_rgb_dual_plane) + { + const float rg_corr = chan_pair_correlations[0]; + const float rb_corr = chan_pair_correlations[1]; + const float gb_corr = chan_pair_correlations[2]; + + int desired_dp_chan_rgb = -1; + + float min_p = minimum(rg_corr, rb_corr, gb_corr); + + if (min_p < enc_cfg.m_strong_dp_decorr_thresh_rgb) + { + const bool has_r = active_chan_flags[0], has_g = active_chan_flags[1]; + //const bool has_b = active_chan_flags[2]; + + uint32_t total_active_chans_rgb = 0; + for (uint32_t i = 0; i < 3; i++) + total_active_chans_rgb += active_chan_flags[i]; + + if (total_active_chans_rgb == 2) + { + if (!has_r) + desired_dp_chan_rgb = 1; + else if (!has_g) + desired_dp_chan_rgb = 0; + else + desired_dp_chan_rgb = 0; + } + else if (total_active_chans_rgb == 3) + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan_rgb = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan_rgb = 1; + // assume b is weakest + else + desired_dp_chan_rgb = 2; + } + } + + if (desired_dp_chan_rgb != -1) + { + assert(active_chan_flags[desired_dp_chan_rgb]); + enc_blk_params.m_dp_active_chans[desired_dp_chan_rgb] = true; + } + } + } + + if (!enc_blk_params.m_dp_active_chans[0] && !enc_blk_params.m_dp_active_chans[1] && !enc_blk_params.m_dp_active_chans[2] && !enc_blk_params.m_dp_active_chans[3]) + { + enc_blk_params.m_use_dual_planes = false; + } + + astc_ldr::cem_encode_params temp_cem_enc_params; + if (superpass_index == 1) + { + enc_blk_params.m_base_parts2 = enc_cfg.m_base_parts2_p2; + enc_blk_params.m_base_parts3 = enc_cfg.m_base_parts3_p2; + enc_blk_params.m_part2_fraction_to_keep = 1; + enc_blk_params.m_part3_fraction_to_keep = 1; + + memcpy(enc_blk_params.m_superbucket_max_to_retain, enc_cfg.m_superbucket_max_to_retain_p2, sizeof(enc_cfg.m_superbucket_max_to_retain_p2)); + memcpy(enc_blk_params.m_final_shortlist_max_size, enc_cfg.m_final_shortlist_max_size_p2, sizeof(enc_cfg.m_final_shortlist_max_size_p2)); + + if (enc_cfg.m_second_pass_force_subsets_enabled) + enc_blk_params.m_subsets_enabled = true; + enc_blk_params.m_subsets_edge_filtering = false; + + if (enc_cfg.m_force_all_dp_chans_p2) + { + enc_blk_params.m_dp_active_chans[0] = active_chan_flags[0]; + enc_blk_params.m_dp_active_chans[1] = active_chan_flags[1]; + enc_blk_params.m_dp_active_chans[2] = active_chan_flags[2]; + enc_blk_params.m_dp_active_chans[3] = active_chan_flags[3]; + enc_blk_params.m_use_dual_planes = true; + + if (!enc_blk_params.m_dp_active_chans[0] && !enc_blk_params.m_dp_active_chans[1] && !enc_blk_params.m_dp_active_chans[2] && !enc_blk_params.m_dp_active_chans[3]) + { + enc_blk_params.m_use_dual_planes = false; + } + } + + enc_blk_params.m_gradient_descent_flag = true; + enc_blk_params.m_polish_weights_flag = true; + + enc_blk_params.m_use_direct_modes = true; + enc_blk_params.m_use_base_scale_modes = true; + + enc_blk_params.m_early_stop_wpsnr = enc_cfg.m_early_stop_wpsnr + 2.0f; + enc_blk_params.m_early_stop2_wpsnr = enc_cfg.m_early_stop2_wpsnr + 2.0f; + + if (enc_cfg.m_second_pass_total_weight_refine_passes) + { + temp_cem_enc_params = enc_cfg.m_cem_enc_params; + enc_blk_params.m_pEnc_params = &temp_cem_enc_params; + + temp_cem_enc_params.m_total_weight_refine_passes = enc_cfg.m_second_pass_total_weight_refine_passes; + temp_cem_enc_params.m_worst_weight_nudging_flag = true; + temp_cem_enc_params.m_endpoint_refinement_flag = true; + } + } + + scoped_ldr_astc_lowlevel_block_encoder scoped_block_encoder(encoder_pool); + if (scoped_block_encoder.get_ptr() == nullptr) + { + error_printf("Failed allocating thread local encode block temps\n"); + encoder_failed_flag.store(true); + return; + } + + // solid color + { + encode_block_output* pOut = out_blocks.enlarge(1); + pOut->clear(); + + astc_helpers::log_astc_block& log_blk = pOut->m_log_blk; + + log_blk.clear(); + log_blk.m_solid_color_flag_ldr = true; + + for (uint32_t c = 0; c < 4; c++) + log_blk.m_solid_color[c] = (uint16_t)clamp((int)std::round(pixel_stats.m_mean_f[c] * 255.0f), 0, 255); + + // Expand each component to 16-bits + for (uint32_t c = 0; c < 4; c++) + log_blk.m_solid_color[c] |= (uint16_t)(log_blk.m_solid_color[c]) << 8u; + + pOut->m_sse = eval_error(block_width, block_height, log_blk, pixel_stats, enc_cfg.m_cem_enc_params); + } + + encode_block_stats enc_block_stats; + + bool enc_status = scoped_block_encoder.get_ptr()->full_encode(enc_blk_params, pixel_stats, out_blocks, 0, enc_block_stats); + if (!enc_status) + { + encoder_failed_flag.store(true); + return; + } + +#if 1 + // --------------------- BLOCK BLURRING + // TODO - very slow, needs more configuration and tuning, experimental + const float BLUR_STD_DEV_THRESH = (15.0f / 255.0f); + const float BLUR_SOBEL_ENERGY_THRESH = 15000.0f; + + const bool use_blurs = (enc_cfg.m_blurring_enabled && (!selective_blurring || ((max_std_dev > BLUR_STD_DEV_THRESH) && (sobel_energy > BLUR_SOBEL_ENERGY_THRESH)))) || + (enc_cfg.m_blurring_enabled_p2 && (superpass_index == 1)); + + if (use_blurs) + { + { + assert(orig_img_blurred2.get_width()); + + color_rgba block_pixels_blurred2[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + orig_img_blurred2.extract_block_clamped(block_pixels_blurred2, bx * block_width, by * block_height, block_width, block_height); + + astc_ldr::pixel_stats_t pixel_stats_blurred2; + pixel_stats_blurred2.init(total_block_pixels, block_pixels_blurred2); + + enc_status = scoped_block_encoder.get_ptr()->full_encode(enc_blk_params, pixel_stats_blurred2, out_blocks, 1, enc_block_stats); + if (!enc_status) + { + encoder_failed_flag.store(true); + return; + } + } + + { + assert(orig_img_blurred3.get_width()); + + color_rgba block_pixels_blurred3[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + orig_img_blurred3.extract_block_clamped(block_pixels_blurred3, bx * block_width, by * block_height, block_width, block_height); + + astc_ldr::pixel_stats_t pixel_stats_blurred3; + pixel_stats_blurred3.init(total_block_pixels, block_pixels_blurred3); + + enc_status = scoped_block_encoder.get_ptr()->full_encode(enc_blk_params, pixel_stats_blurred3, out_blocks, 2, enc_block_stats); + if (!enc_status) + { + encoder_failed_flag.store(true); + return; + } + } + + { + assert(orig_img_blurred4.get_width()); + + color_rgba block_pixels_blurred4[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + orig_img_blurred4.extract_block_clamped(block_pixels_blurred4, bx * block_width, by * block_height, block_width, block_height); + + astc_ldr::pixel_stats_t pixel_stats_blurred4; + pixel_stats_blurred4.init(total_block_pixels, block_pixels_blurred4); + + enc_status = scoped_block_encoder.get_ptr()->full_encode(enc_blk_params, pixel_stats_blurred4, out_blocks, 3, enc_block_stats); + if (!enc_status) + { + encoder_failed_flag.store(true); + return; + } + } + + { + assert(orig_img_blurred5.get_width()); + + color_rgba block_pixels_blurred5[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + orig_img_blurred5.extract_block_clamped(block_pixels_blurred5, bx * block_width, by * block_height, block_width, block_height); + + astc_ldr::pixel_stats_t pixel_stats_blurred5; + pixel_stats_blurred5.init(total_block_pixels, block_pixels_blurred5); + + enc_status = scoped_block_encoder.get_ptr()->full_encode(enc_blk_params, pixel_stats_blurred5, out_blocks, 4, enc_block_stats); + if (!enc_status) + { + encoder_failed_flag.store(true); + return; + } + } + } +#endif + + // --------------------- WEIGHT GRID DCT CODING + if (enc_cfg.m_use_dct) + { + // apply DCT to weights + for (uint32_t out_block_iter = 0; out_block_iter < out_blocks.size_u32(); out_block_iter++) + { + if (out_blocks[out_block_iter].m_trial_mode_index < 0) + continue; + + astc_helpers::log_astc_block& log_astc_blk = out_blocks[out_block_iter].m_log_blk; + + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, log_astc_blk.m_grid_width, log_astc_blk.m_grid_height); + + const uint32_t num_planes = (log_astc_blk.m_dual_plane ? 2 : 1); + for (uint32_t plane_index = 0; plane_index < num_planes; plane_index++) + { + bitwise_coder c; + basist::astc_ldr_t::dct_syms syms; + code_block_weights(grid_coder, enc_cfg.m_base_q, plane_index, log_astc_blk, pGrid_data, c, syms); + + out_blocks[out_block_iter].m_packed_dct_plane_data[plane_index] = syms; + + c.flush(); + + basist::bitwise_decoder d; + d.init(c.get_bytes().data(), c.get_bytes().size_u32()); + + // ensure existing weights get blown away + for (uint32_t i = 0; i < (uint32_t)(log_astc_blk.m_grid_width * log_astc_blk.m_grid_height); i++) + log_astc_blk.m_weights[i * num_planes + plane_index] = 0; + + basist::astc_ldr_t::fvec dct_temp; + bool status = grid_coder.decode_block_weights(enc_cfg.m_base_q, plane_index, log_astc_blk, &d, pGrid_data, nullptr, dct_temp, nullptr); + + assert(status); + if (!status) + { + error_printf("grid_coder.decode_block_weights() failed!\n"); + + encoder_failed_flag.store(true); + return; + } + +#if 0 + { + astc_helpers::log_astc_block alt_log_astc_blk(log_astc_blk); + + for (uint32_t i = 0; i < (uint32_t)(log_astc_blk.m_grid_width * log_astc_blk.m_grid_height); i++) + alt_log_astc_blk.m_weights[i * num_planes + plane_index] = 0; + + status = grid_coder.decode_block_weights(q, plane_index, alt_log_astc_blk, nullptr, pGrid_data, &out_block_dct_stats[out_block_iter], &syms); + assert(status); + + for (uint32_t i = 0; i < (uint32_t)(log_astc_blk.m_grid_width * log_astc_blk.m_grid_height); i++) + { + assert(log_astc_blk.m_weights[i * num_planes + plane_index] == alt_log_astc_blk.m_weights[i * num_planes + plane_index]); + } + + } +#endif + // TODO: in theory, endpoints can be refined if they don't change the DCT span. + } + + out_blocks[out_block_iter].m_sse = eval_error(block_width, block_height, log_astc_blk, pixel_stats, enc_cfg.m_cem_enc_params); + + } // for + + } // use_dct + + // Find best output block + uint64_t best_out_blocks_err = UINT64_MAX; + uint32_t best_out_blocks_index = 0; + astc_helpers::log_astc_block best_out_blocks_log_astc_blk; + + for (uint32_t out_block_iter = 0; out_block_iter < out_blocks.size_u32(); out_block_iter++) + { + const astc_helpers::log_astc_block& log_astc_blk = out_blocks[out_block_iter].m_log_blk; + + color_rgba dec_pixels[astc_helpers::MAX_BLOCK_DIM * astc_helpers::MAX_BLOCK_DIM]; + bool dec_status = astc_helpers::decode_block(log_astc_blk, dec_pixels, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + + assert(dec_status); + if (!dec_status) + { + encoder_failed_flag.store(true); + return; + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < total_block_pixels; i++) + total_err += weighted_color_error(block_pixels[i], dec_pixels[i], enc_cfg.m_cem_enc_params); + + // if not blurred + if (out_blocks[out_block_iter].m_blur_id == 0) + { + if (out_blocks[out_block_iter].m_sse != total_err) + { + assert(0); + fmt_error_printf("output block SSE invalid\n"); + encoder_failed_flag.store(true); + return; + } + } + + // Replace m_sse with the actual WSSE vs. the original source block (in case it was blurred) + out_blocks[out_block_iter].m_sse = total_err; + + if (total_err < best_out_blocks_err) + { + best_out_blocks_err = total_err; + best_out_blocks_log_astc_blk = log_astc_blk; + best_out_blocks_index = out_block_iter; + } + } // out_block_iter + +#if 0 + // TODO: Save memory, only minimally tested + if (enc_cfg.m_save_single_result) + { + basisu::vector new_out_blocks(1); + new_out_blocks[0] = out_blocks[best_out_blocks_index]; + + std::swap(out_blocks, new_out_blocks); + + best_out_blocks_index = 0; + } +#endif + + ldr_astc_block_encode_image_output::block_info& block_info_out = enc_out.m_image_block_info(bx, by); + + block_info_out.m_low_freq_block_flag = low_freq_block_flag; + block_info_out.m_super_strong_edges = scoped_block_encoder.get_ptr()->m_super_strong_edges; + block_info_out.m_very_strong_edges = scoped_block_encoder.get_ptr()->m_very_strong_edges; + block_info_out.m_strong_edges = scoped_block_encoder.get_ptr()->m_strong_edges; + block_info_out.m_packed_out_block_index = best_out_blocks_index; + + // Create packed ASTC block + astc_helpers::astc_block& best_phys_block = packed_blocks(bx, by); + bool pack_success = astc_helpers::pack_astc_block(best_phys_block, best_out_blocks_log_astc_blk); + if (!pack_success) + { + encoder_failed_flag.store(true); + return; + } + + output_block_devel_desc& out_devel_desc = output_block_devel_info(bx, by); + out_devel_desc.m_low_freq_block_flag = low_freq_block_flag; + out_devel_desc.m_super_strong_edges = scoped_block_encoder.get_ptr()->m_super_strong_edges; + out_devel_desc.m_very_strong_edges = scoped_block_encoder.get_ptr()->m_very_strong_edges; + out_devel_desc.m_strong_edges = scoped_block_encoder.get_ptr()->m_strong_edges; + + // Critical Section + { + std::lock_guard g(global_mutex); + + if (use_blurs) + total_blur_encodes++; + + if (out_blocks[best_out_blocks_index].m_blur_id) + total_blurred_blocks1++; + + if (superpass_index == 0) + { + // TODO: Add 2nd pass statistics + total_superbuckets_created += enc_block_stats.m_total_superbuckets_created; + total_buckets_created += enc_block_stats.m_total_buckets_created; + total_surrogate_encodes += enc_block_stats.m_total_surrogate_encodes; + total_full_encodes += enc_block_stats.m_total_full_encodes; + total_shortlist_candidates += enc_block_stats.m_total_shortlist_candidates; + } + else if (superpass_index == 1) + { + total_full_encodes_pass1 += enc_block_stats.m_total_full_encodes; + } + + total_blocks_done++; + if (enc_cfg.m_debug_output) + { + if (superpass_index == 1) + { + if ((total_blocks_done & 63) == 63) + { + float new_val = ((float)total_blocks_done * 100.0f) / (float)total_blocks_to_recompress; + if ((new_val - last_printed_progress_val) >= 5.0f) + { + last_printed_progress_val = new_val; + fmt_printf("{3.2}%\n", new_val); + } + } + } + else if ((total_blocks_done & 255) == 255) + { + float new_val = ((float)total_blocks_done * 100.0f) / (float)total_blocks; + if ((new_val - last_printed_progress_val) >= 5.0f) + { + last_printed_progress_val = new_val; + fmt_printf("{3.2}%\n", new_val); + } + } + } + + } // lock_guard (global_mutex) + + } // if (superpass_index == ...) + + }); + + if (encoder_failed_flag) + break; + + } // bx + + if (encoder_failed_flag) + break; + + } // by + + if (encoder_failed_flag) + { + fmt_error_printf("Main compressor block loop failed!\n"); + return false; + } + + job_pool.wait_for_all(); + + if (encoder_failed_flag) + { + fmt_error_printf("Main compressor block loop failed!\n"); + return false; + } + + if ((superpass_index == 0) && (enc_cfg.m_second_superpass_refinement) && (enc_cfg.m_second_superpass_fract_to_recompress > 0.0f)) + { + uint_vec block_wsse_indices(total_blocks); + + float_vec block_wsses(total_blocks); + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + ldr_astc_block_encode_image_output::block_info& out_block_info = enc_out.m_image_block_info(bx, by); + + float wsse = (float)out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_sse; + + block_wsses[bx + by * num_blocks_x] = wsse; + } // bx + } // by + + indirect_sort(total_blocks, block_wsse_indices.data(), block_wsses.data()); + + if (block_wsses[block_wsse_indices[total_blocks - 1]] > 0.0f) + { + total_blocks_to_recompress = clamp((uint32_t)std::round((float)total_blocks * enc_cfg.m_second_superpass_fract_to_recompress), 0, total_blocks); + + image vis_recomp_img; + if (enc_cfg.m_debug_images) + vis_recomp_img.resize(width, height); + + for (uint32_t i = 0; i < total_blocks_to_recompress; i++) + { + const uint32_t block_index = block_wsse_indices[total_blocks - 1 - i]; + + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + superpass2_recompress_block_flags(block_x, block_y) = true; + + if (enc_cfg.m_debug_images) + vis_recomp_img.fill_box(block_x * block_width, block_y * block_height, block_width, block_height, color_rgba(255, 255, 255, 255)); + } + + if (enc_cfg.m_debug_images) + save_png(enc_cfg.m_debug_file_prefix + "vis_recomp_img.png", vis_recomp_img); + } + } + + } // superpass_index + + if (enc_cfg.m_third_superpass_try_neighbors) + { + uint32_t total_superpass1_improved_blocks1 = 0; + uint32_t total_superpass1_improved_blocks2 = 0; + + // Merge pass 2's output into pass 0's/1's output, which can be done safely now. + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + ldr_astc_block_encode_image_output::block_info& out_block_info = enc_out.m_image_block_info(bx, by); + + const ldr_astc_block_encode_image_output::block_info_superpass1& out_block_info_superpass1 = enc_out.m_image_block_info_superpass2(bx, by); + + for (uint32_t neighbor_index = 0; neighbor_index < basist::astc_ldr_t::cMaxConfigReuseNeighbors; neighbor_index++) + { + const int new_neighbor_index = out_block_info_superpass1.m_config_reuse_neighbor_out_block_indices[neighbor_index]; + + if (new_neighbor_index == cInvalidIndex) + { + // Can't reuse neighbor's best output block + continue; + } + + if (!out_block_info_superpass1.m_config_reuse_new_neighbor_out_block_flags[neighbor_index]) + { + // Reuses an existing, already encoded output block which matches the neighbor + assert((size_t)new_neighbor_index < out_block_info.m_out_blocks.size()); + continue; + } + + const uint32_t new_out_block_index = out_block_info.m_out_blocks.size_u32(); + + const encode_block_output& new_output_blk = out_block_info_superpass1.m_new_out_config_reuse_blocks[new_neighbor_index]; + + out_block_info.m_out_blocks.push_back(new_output_blk); + +#define BU_CHECK_NEIGHBOR_BEST (1) + +#if BU_CHECK_NEIGHBOR_BEST + // See if the solution has improved + if (new_output_blk.m_sse < out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_sse) + { + total_superpass1_improved_blocks1++; + + // Warning: This invalidate the neighbor indices + out_block_info.m_packed_out_block_index = new_out_block_index; + + //astc_helpers::astc_block& packed_block = enc_out.m_packed_phys_blocks(bx, by); + + bool pack_success = astc_helpers::pack_astc_block((astc_helpers::astc_block&)packed_blocks(bx, by), new_output_blk.m_log_blk); + if (!pack_success) + { + fmt_error_printf("astc_helpers::pack_astc_block failed\n"); + + return false; + } + } +#endif + + } // neighbor_index + + for (uint32_t j = 0; j < out_block_info_superpass1.m_new_out_config_endpoint_reuse_blocks.size(); j++) + { + const uint32_t new_out_block_index = out_block_info.m_out_blocks.size_u32(); + + const encode_block_output& new_output_blk = out_block_info_superpass1.m_new_out_config_endpoint_reuse_blocks[j]; + + out_block_info.m_out_blocks.push_back(new_output_blk); + +#define BU_CHECK_NEIGHBOR_BEST (1) + +#if BU_CHECK_NEIGHBOR_BEST + // See if the solution has improved + if (new_output_blk.m_sse < out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_sse) + { + total_superpass1_improved_blocks2++; + + // Warning: This invalidate the neighbor indices + out_block_info.m_packed_out_block_index = new_out_block_index; + + //astc_helpers::astc_block& packed_block = enc_out.m_packed_phys_blocks(bx, by); + + bool pack_success = astc_helpers::pack_astc_block((astc_helpers::astc_block&)packed_blocks(bx, by), new_output_blk.m_log_blk); + if (!pack_success) + { + fmt_error_printf("astc_helpers::pack_astc_block failed\n"); + + return false; + } + } +#endif + + } // j + + } // bx + } // by + + if (enc_cfg.m_debug_output) + { + fmt_debug_printf("Total superpass 1 improved blocks 1: {} {3.2}%\n", total_superpass1_improved_blocks1, ((float)total_superpass1_improved_blocks1 * 100.0f) / (float)(total_blocks)); + fmt_debug_printf("Total superpass 1 improved blocks 2: {} {3.2}%\n", total_superpass1_improved_blocks2, ((float)total_superpass1_improved_blocks2 * 100.0f) / (float)(total_blocks)); + } + } + + if (ASTC_LDR_CONSISTENCY_CHECKING) + { + if (enc_cfg.m_debug_output) + fmt_debug_printf("consistency checking\n"); + + // Consistency/sanity cross checking + //uint32_t total_blocks_using_neighbor_config = 0; + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const ldr_astc_block_encode_image_output::block_info& out_block_info = enc_out.m_image_block_info(bx, by); + +#if BU_CHECK_NEIGHBOR_BEST + uint64_t best_sse = UINT64_MAX; + uint32_t best_out_block_index = 0; + + for (uint32_t i = 0; i < out_block_info.m_out_blocks.size(); i++) + { + if (out_block_info.m_out_blocks[i].m_sse < best_sse) + { + best_sse = out_block_info.m_out_blocks[i].m_sse; + best_out_block_index = i; + } + } // i + + if (best_out_block_index != out_block_info.m_packed_out_block_index) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } +#endif + + if (out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_sse != + eval_error(block_width, block_height, out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_log_blk, out_block_info.m_pixel_stats, enc_cfg.m_cem_enc_params)) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + + // Ensure packed output block matches the expected best WSSE block. + astc_helpers::astc_block packed_block; + bool pack_success = astc_helpers::pack_astc_block(packed_block, out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_log_blk); + if (!pack_success) + { + fmt_error_printf("astc_helpers::pack_astc_block failed\n"); + return false; + } + + if (memcmp(&packed_block, &enc_out.m_packed_phys_blocks(bx, by), sizeof(astc_helpers::astc_block)) != 0) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + + // DCT check + if ((enc_cfg.m_use_dct) && (out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_trial_mode_index >= 0)) + { + const auto& best_log_blk = out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_log_blk; + if (best_log_blk.m_solid_color_flag_ldr) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + + const basist::astc_ldr_t::astc_block_grid_data* pGrid_data = basist::astc_ldr_t::find_astc_block_grid_data(block_width, block_height, best_log_blk.m_grid_width, best_log_blk.m_grid_height); + const uint32_t total_planes = best_log_blk.m_num_partitions ? (best_log_blk.m_dual_plane ? 2 : 1) : 0; + + astc_helpers::log_astc_block verify_log_blk(best_log_blk); + + for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++) + { + if (!out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_packed_dct_plane_data[plane_index].m_coeffs.size()) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + + basist::astc_ldr_t::fvec dct_temp; + bool dec_status = grid_coder.decode_block_weights(enc_cfg.m_base_q, plane_index, verify_log_blk, nullptr, pGrid_data, nullptr, dct_temp, + &out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index].m_packed_dct_plane_data[plane_index]); + + if (!dec_status) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + + for (uint32_t i = 0; i < (uint32_t)(best_log_blk.m_grid_width * best_log_blk.m_grid_height); i++) + { + if (best_log_blk.m_weights[i * total_planes + plane_index] != verify_log_blk.m_weights[i * total_planes + plane_index]) + { + fmt_error_printf("consistency check failed\n"); + assert(0); + return false; + } + } + + } // plane_index + } + + } // bx + } // by + + if (enc_cfg.m_debug_output) + fmt_debug_printf("consistency checking PASSED\n"); + } + + //fmt_debug_printf("Total blocks using neighbor config: {} {3.2}%\n", total_blocks_using_neighbor_config, ((float)total_blocks_using_neighbor_config * 100.0f) / (float)(total_blocks)); + + // Debug output + uint_vec trial_mode_hist; + trial_mode_hist.resize(encoder_trial_modes.size()); + uint32_t total_alpha_blocks = 0; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const ldr_astc_block_encode_image_output::block_info& out_block_info = enc_out.m_image_block_info(bx, by); + const astc_ldr::pixel_stats_t& pixel_stats = out_block_info.m_pixel_stats; + + const encode_block_output& best_out_block = out_block_info.m_out_blocks[out_block_info.m_packed_out_block_index]; + const astc_helpers::log_astc_block& best_out_blocks_log_astc_blk = best_out_block.m_log_blk; + + if (pixel_stats.m_has_alpha) + total_alpha_blocks++; + + output_block_devel_desc& out_devel_desc = output_block_devel_info(bx, by); + out_devel_desc.m_had_alpha = pixel_stats.m_has_alpha; + out_devel_desc.m_trial_mode_index = best_out_block.m_trial_mode_index; + out_devel_desc.m_pTrial_modes = encoder_trial_modes.data(); + + if (out_devel_desc.m_trial_mode_index >= 0) + trial_mode_hist[out_devel_desc.m_trial_mode_index]++; + + //const float total_astc_weight_bits = log2f((float)astc_helpers::get_ise_levels(best_out_block.m_log_blk.m_weight_ise_range)) * + // best_out_block.m_log_blk.m_grid_width * best_out_block.m_log_blk.m_grid_height * (best_out_block.m_log_blk.m_dual_plane ? 2 : 1); + + //bool used_blue_contraction = astc_ldr::used_blue_contraction(best_out_blocks_log_astc_blk.m_color_endpoint_modes[0], best_out_blocks_log_astc_blk.m_endpoints, best_out_blocks_log_astc_blk.m_endpoint_ise_range); + + if (enc_cfg.m_debug_images) + { + color_rgba vis_col(g_black_color); + color_rgba vis2_col(g_black_color); + color_rgba dp_vis(g_black_color); + color_rgba base_ofs_vis(g_black_color); + //color_rgba dct_bits_abs_vis(g_black_color); + //color_rgba dct_bits_vs_astc_vis(g_black_color); + + const astc_ldr::partition_pattern_vec* pPat = nullptr; + + if (best_out_blocks_log_astc_blk.m_num_partitions == 2) + { + vis_col.set(0, 255, 0, 255); + + const astc_ldr::partitions_data* pPart_data = pPart_data_p2; + + const uint32_t part_seed_index = best_out_blocks_log_astc_blk.m_partition_id; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + + pPat = &pPart_data->m_partition_pats[part_unique_index]; + } + else if (best_out_blocks_log_astc_blk.m_num_partitions == 3) + { + vis_col.set(0, 0, 255, 255); + + const astc_ldr::partitions_data* pPart_data = pPart_data_p3; + + const uint32_t part_seed_index = best_out_blocks_log_astc_blk.m_partition_id; + const uint32_t part_unique_index = pPart_data->m_part_seed_to_unique_index[part_seed_index]; + + pPat = &pPart_data->m_partition_pats[part_unique_index]; + } + + // vis_col.r = enc_blk_params.m_use_base_scale_modes ? 255 : 0; + // vis_col.g = enc_blk_params.m_use_direct_modes ? 255 : 0; + + if (!out_devel_desc.m_low_freq_block_flag) + { + if (out_devel_desc.m_super_strong_edges) + vis2_col.set(255, 0, 255, 255); + else if (out_devel_desc.m_very_strong_edges) + vis2_col.set(255, 0, 0, 255); + else if (out_devel_desc.m_strong_edges) + vis2_col.set(0, 255, 0, 255); + } + + if (pPat) + { + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const uint32_t subset_idx = (*pPat)(x, y); + + color_rgba c(g_black_color); + + if (best_out_blocks_log_astc_blk.m_num_partitions == 2) + { + assert(subset_idx < 2); + c = subset_idx ? color_rgba(255, 0, 0, 255) : color_rgba(0, 255, 0, 255); + } + else + { + assert(best_out_blocks_log_astc_blk.m_num_partitions == 3); + assert(subset_idx < 3); + + if (subset_idx == 2) + c = color_rgba(0, 0, 255, 255); + else if (subset_idx == 1) + c = color_rgba(32, 0, 190, 255); + else + c = color_rgba(64, 0, 64, 255); + } + + vis_part_pat_img.set_clipped(bx * block_width + x, by * block_height + y, c); + } + } + } + + if (best_out_blocks_log_astc_blk.m_dual_plane) + dp_vis.g = 255; + + if ((best_out_blocks_log_astc_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || + (best_out_blocks_log_astc_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)) + { + base_ofs_vis.b = 255; + } + + vis_part_usage_img.fill_box(bx * block_width, by * block_height, block_width, block_height, vis_col); + vis_strong_edge.fill_box(bx * block_width, by * block_height, block_width, block_height, vis2_col); + vis_dp_img.fill_box(bx * block_width, by * block_height, block_width, block_height, dp_vis); + vis_base_ofs_img.fill_box(bx * block_width, by * block_height, block_width, block_height, base_ofs_vis); + } + + } // bx + + } // by + + const double total_enc_time = itm.get_elapsed_secs(); + + if (enc_cfg.m_debug_output) + fmt_debug_printf("ASTC packing complete\n"); + + image unpacked_img(width, height); + + // Unpack packed image, validate ASTC data with several decoders. + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const astc_helpers::astc_block* pPhys_block = &packed_blocks(bx, by); + + astc_helpers::log_astc_block log_blk; + bool status = astc_helpers::unpack_block(pPhys_block, log_blk, block_width, block_height); + if (!status) + { + fmt_error_printf("unpack_block() failed\n"); + return false; + } + + // Decode with our generic ASTC decoder. + color_rgba block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + status = astc_helpers::decode_block(log_blk, block_pixels, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status) + { + fmt_error_printf("decode_block() failed\n"); + return false; + } + + unpacked_img.set_block_clipped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + + // Decode with the Android testing framework ASTC decoder + { + uint8_t dec_pixels_android[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS * 4]; + + bool android_success = basisu_astc::astc::decompress_ldr(dec_pixels_android, (const uint8_t*)pPhys_block, enc_cfg.m_cem_enc_params.m_decode_mode_srgb, block_width, block_height); + if (!android_success) + { + fmt_error_printf("Android ASTC decoder failed!\n"); + return false; + } + + if (memcmp(dec_pixels_android, block_pixels, total_block_pixels * 4) != 0) + { + fmt_error_printf("Android ASTC decoder mismatch!\n"); + return false; + } + } + + // Decode with our optimized XUASTC LDR decoder + { + color_rgba block_pixels_alt[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + status = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels_alt, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status) + { + fmt_error_printf("decode_block_xuastc_ldr() failed\n"); + return false; + } + + if (memcmp(block_pixels, block_pixels_alt, total_block_pixels * 4) != 0) + { + fmt_error_printf("XUASTC LDR ASTC decoder mismatch!\n"); + return false; + } + } + + } // bx + } // by + + if (enc_cfg.m_debug_images) + { + save_png(enc_cfg.m_debug_file_prefix + "dbg_astc_ldr_unpacked_img.png", unpacked_img); + + if (vis_part_usage_img.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_part_usage.png", vis_part_usage_img); + + if (vis_part_pat_img.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_part_pat_img.png", vis_part_pat_img); + + if (vis_strong_edge.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_strong_edge.png", vis_strong_edge); + + if (vis_dct_low_freq_block.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_dct_low_freq_block.png", vis_dct_low_freq_block); + + if (vis_dp_img.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_dp.png", vis_dp_img); + + if (vis_base_ofs_img.is_valid()) + save_png(enc_cfg.m_debug_file_prefix + "vis_base_ofs.png", vis_base_ofs_img); + } + + if (enc_cfg.m_debug_output) + { + uint32_t cem_used_hist[16] = { 0 }; + uint32_t cem_used_bc[16] = { 0 }; + uint32_t cem_used_subsets[16] = { 0 }; + uint32_t cem_used_dp[16] = { 0 }; + uint32_t total_dp = 0, total_base_ofs = 0; + uint32_t subset_used_hist[4] = { 0 }; + uint32_t grid_usage_hist[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS * astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS + 1] = { 0 }; + + uint32_t total_header_bits = 0; + uint32_t total_weight_bits = 0; + uint32_t total_endpoint_bits = 0; + + uint32_t total_void_extent = 0; + + uint32_t used_endpoint_levels_hist[astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + 1] = { 0 }; + uint32_t used_weight_levels_hist[astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + 1] = { 0 }; + + uint32_t total_blocks_using_subsets = 0; + + uint32_t total_used_bc = 0; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const output_block_devel_desc& desc = output_block_devel_info(bx, by); + + const astc_helpers::astc_block* pPhys_block = &packed_blocks(bx, by); + + astc_helpers::log_astc_block log_blk; + bool status = astc_helpers::unpack_block(pPhys_block, log_blk, block_width, block_height); + if (!status) + { + fmt_error_printf("unpack_block() failed\n"); + return false; + } + + if (desc.m_trial_mode_index < 0) + { + total_void_extent++; + continue; + } + else + { + const basist::astc_ldr_t::trial_mode& tm = desc.m_pTrial_modes[desc.m_trial_mode_index]; + + const uint32_t actual_cem = log_blk.m_color_endpoint_modes[0]; + //assert(tm.m_cem == log_blk.m_color_endpoint_modes[0]); // may differ due to base+ofs usage + + assert((tm.m_ccs_index >= 0) == log_blk.m_dual_plane); + assert((!log_blk.m_dual_plane) || (tm.m_ccs_index == log_blk.m_color_component_selector)); + assert(tm.m_endpoint_ise_range == log_blk.m_endpoint_ise_range); + assert(tm.m_weight_ise_range == log_blk.m_weight_ise_range); + assert(tm.m_grid_width == log_blk.m_grid_width); + assert(tm.m_grid_height == log_blk.m_grid_height); + assert(tm.m_num_parts == log_blk.m_num_partitions); + + used_weight_levels_hist[open_range_check(tm.m_weight_ise_range - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE, std::size(used_weight_levels_hist))]++; + used_endpoint_levels_hist[open_range_check(tm.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE, std::size(used_endpoint_levels_hist))]++; + + cem_used_hist[actual_cem]++; + if (log_blk.m_dual_plane) + total_dp++; + + subset_used_hist[open_range_check(log_blk.m_num_partitions - 1, std::size(subset_used_hist))]++; + + bool used_bc = false; + for (uint32_t i = 0; i < tm.m_num_parts; i++) + { + if (astc_helpers::used_blue_contraction(actual_cem, log_blk.m_endpoints + i * astc_helpers::get_num_cem_values(actual_cem), log_blk.m_endpoint_ise_range)) + { + used_bc = true; + } + } + + if (used_bc) + { + cem_used_bc[actual_cem]++; + total_used_bc++; + } + + if (tm.m_num_parts > 1) + cem_used_subsets[actual_cem]++; + + // TODO: add CCS index histogram per CEM + if (log_blk.m_dual_plane) + cem_used_dp[actual_cem]++; + + if ((actual_cem == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || + (actual_cem == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)) + { + total_base_ofs++; + } + + grid_usage_hist[open_range_check(log_blk.m_grid_width * log_blk.m_grid_height, std::size(grid_usage_hist))]++; + + if (tm.m_num_parts > 1) + total_blocks_using_subsets++; + } + + astc_helpers::pack_stats pack_stats; + pack_stats.clear(); + + astc_helpers::astc_block temp_phys_block; + int expected_endpoint_range = 0; + status = astc_helpers::pack_astc_block(temp_phys_block, log_blk, &expected_endpoint_range, &pack_stats); + assert(status); + + total_header_bits += pack_stats.m_header_bits; + total_weight_bits += pack_stats.m_weight_bits; + total_endpoint_bits += pack_stats.m_endpoint_bits; + + } // bx + } // by + + uint32_t total_used_modes = 0; + + fmt_debug_printf("--------------------- Trial Modes:\n"); + + for (uint32_t i = 0; i < trial_mode_hist.size(); i++) + { + if (!trial_mode_hist[i]) + continue; + + if (trial_mode_hist[i]) + total_used_modes++; + +#if 0 + const uint32_t total_mode_blocks = trial_mode_hist[i]; + + const uint32_t num_subsets = encoder_trial_modes[i].m_num_parts; + const uint32_t cem_index = encoder_trial_modes[i].m_cem; + + fmt_debug_printf("{}: {} {3.2}%: cem: {}, grid {}x{}, e: {} w: {}, ccs: {}, parts: {}, total base+ofs: {}, total direct: {}\n", i, total_mode_blocks, (float)total_mode_blocks * 100.0f / (float)total_blocks, + encoder_trial_modes[i].m_cem, + encoder_trial_modes[i].m_grid_width, encoder_trial_modes[i].m_grid_height, + astc_helpers::get_ise_levels(encoder_trial_modes[i].m_endpoint_ise_range), astc_helpers::get_ise_levels(encoder_trial_modes[i].m_weight_ise_range), + encoder_trial_modes[i].m_ccs_index, + encoder_trial_modes[i].m_num_parts, + used_base_offset_count[i], + used_rgb_direct_count[i]); +#endif + } + + fmt_debug_printf("\n"); + + fmt_debug_printf("Used endpoint ISE levels:\n"); + for (uint32_t i = 0; i < std::size(used_endpoint_levels_hist); i++) + fmt_debug_printf("{} levels: {}\n", astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i), used_endpoint_levels_hist[i]); + + fmt_debug_printf("\nUsed weight ISE levels:\n"); + for (uint32_t i = 0; i < std::size(used_weight_levels_hist); i++) + fmt_debug_printf("{} levels: {}\n", astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + i), used_weight_levels_hist[i]); + + const uint32_t total_blocks_excluding_void_extent = total_blocks - total_void_extent; + + fmt_debug_printf("\nTotal blocks: {}, excluding void extent: {}\n", total_blocks, total_blocks_excluding_void_extent); + fmt_debug_printf("Total void extent blocks skipped by compressor: {}\n", total_void_extent_blocks_skipped); + fmt_debug_printf("Total final void extent blocks: {}\n", total_void_extent); + fmt_debug_printf("Total input blocks with alpha: {} {3.1}%\n", total_alpha_blocks, (float)total_alpha_blocks * 100.0f / (float)total_blocks); + + fmt_debug_printf("\nASTC phys avg block stats (including void extent):\n"); + fmt_debug_printf("Total header bits: {}, {} per block, {} per pixel\n", total_header_bits, (float)total_header_bits / (float)total_blocks, (float)total_header_bits / (float)(total_pixels)); + fmt_debug_printf("Total weight bits: {}, {} per block, {} per pixel\n", total_weight_bits, (float)total_weight_bits / (float)total_blocks, (float)total_weight_bits / (float)(total_pixels)); + fmt_debug_printf("Total endpoint bits: {}, {} per block, {} per pixel\n", total_endpoint_bits, (float)total_endpoint_bits / (float)total_blocks, (float)total_endpoint_bits / (float)(total_pixels)); + fmt_debug_printf("Total header+endpoint bits: {}, {} per block, {} per pixel\n", total_header_bits + total_endpoint_bits, + (float)(total_header_bits + total_endpoint_bits) / (float)total_blocks, (float)(total_header_bits + total_endpoint_bits) / (float)(total_pixels)); + fmt_debug_printf("Total header+endpoint+weight bits: {}, {} per block, {} per pixel\n", total_header_bits + total_endpoint_bits + total_weight_bits, + (float)(total_header_bits + total_endpoint_bits + total_weight_bits) / (float)total_blocks, (float)(total_header_bits + total_endpoint_bits + total_weight_bits) / (float)(total_pixels)); + + fmt_debug_printf("\nEncoder stats:\n"); + fmt_debug_printf("Total utilized encoder trial modes: {} {3.2}%\n", total_used_modes, (float)total_used_modes * 100.0f / (float)encoder_trial_modes.size()); + + const uint32_t total_blurred_blocks = total_blurred_blocks1 + total_blurred_blocks2 + total_blurred_blocks3 + total_blurred_blocks4; + + fmt_debug_printf("\nTotal blur encodes: {} ({3.2}%)\n", total_blur_encodes, (float)total_blur_encodes * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blurred blocks: {} ({3.2}%)\n", total_blurred_blocks, (float)total_blurred_blocks * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blurred1 blocks: {} ({3.2}%)\n", total_blurred_blocks1, (float)total_blurred_blocks1 * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blurred2 blocks: {} ({3.2}%)\n", total_blurred_blocks2, (float)total_blurred_blocks2 * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blurred3 blocks: {} ({3.2}%)\n", total_blurred_blocks3, (float)total_blurred_blocks3 * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blurred4 blocks: {} ({3.2}%)\n", total_blurred_blocks4, (float)total_blurred_blocks4 * 100.0f / (float)total_blocks); + + fmt_debug_printf("\nTotal superbuckets created: {} ({4.1} per block)\n", total_superbuckets_created, (float)total_superbuckets_created / (float)total_blocks); + fmt_debug_printf("Total shortlist buckets created: {} ({4.1} per block)\n", total_buckets_created, (float)total_buckets_created / (float)total_blocks); + fmt_debug_printf("Total surrogate encodes: {} ({4.1} per block)\n", total_surrogate_encodes, (float)total_surrogate_encodes / (float)total_blocks); + fmt_debug_printf("Total shortlist candidates (before full encoding): {} ({4.1} per block)\n", total_shortlist_candidates, (float)total_shortlist_candidates / (float)total_blocks); + fmt_debug_printf("Total full encodes on superpass 0: {} ({4.1} per block)\n", total_full_encodes, (float)total_full_encodes / (float)total_blocks); + fmt_debug_printf("Total full encodes on superpass 1: {} ({4.1} per block)\n", total_full_encodes_pass1, (float)total_full_encodes_pass1 / (float)total_blocks); + fmt_debug_printf("Total full encodes on superpass 2: {} ({4.1} per block)\n", total_full_encodes_pass2, (float)total_full_encodes_pass2 / (float)total_blocks); + + debug_printf("\nTotal final encoded ASTC blocks using blue contraction: %u (%.2f%%)\n", total_used_bc, 100.0f * (float)total_used_bc / (float)total_blocks); + + fmt_debug_printf("Total final encoded ASTC blocks using dual planes: {} {3.2}%\n", total_dp, (float)total_dp * 100.0f / (float)total_blocks); + fmt_debug_printf("Total final encoded ASTC blocks using base+ofs: {} {3.2}%\n", total_base_ofs, (float)total_base_ofs * 100.0f / (float)total_blocks); + fmt_debug_printf("Total final encoded ASTC blocks using subsets: {} {3.2}%\n", total_blocks_using_subsets, (float)total_blocks_using_subsets * 100.0f / (float)total_blocks); + + debug_printf("\nSubset usage histogram:\n"); + for (uint32_t i = 0; i < 4; i++) + fmt_debug_printf("{} subsets: {} {3.2}%\n", i + 1, subset_used_hist[i], (float)subset_used_hist[i] * 100.0f / (float)total_blocks); + debug_printf("\n"); + + debug_printf("CEM usage histogram:\n"); + for (uint32_t i = 0; i < 16; i++) + { + if (astc_helpers::is_cem_hdr(i)) + continue; + + std::string n(astc_helpers::get_cem_name(i)); + while (n.size() < 40) + n.push_back(' '); + + fmt_debug_printf("{}: {} {3.2}%, Used BC: {3.2}%, Used subsets: {3.2}%, Used DP: {3.2}%\n", + n, + cem_used_hist[i], + (float)cem_used_hist[i] * 100.0f / (float)total_blocks, + (float)cem_used_bc[i] * 100.0f / (float)total_blocks, + (float)cem_used_subsets[i] * 100.0f / (float)total_blocks, + (float)cem_used_dp[i] * 100.0f / (float)total_blocks); + } + debug_printf("\n"); + + debug_printf("Grid samples histogram:\n"); + for (uint32_t i = 1; i <= block_width * block_height; i++) + { + if (grid_usage_hist[i]) + fmt_debug_printf("{} samples: {} {3.2}%\n", i, grid_usage_hist[i], (float)grid_usage_hist[i] * 100.0f / (float)total_blocks); + } + debug_printf("\n"); + + fmt_debug_printf("orig vs. ASTC compressed:\n"); + print_image_metrics(orig_img, unpacked_img); + + fmt_debug_printf("Total encode time: {.3} secs, {.3} ms per block, {.1} blocks/sec\n", total_enc_time, total_enc_time * 1000.0f / total_blocks, total_blocks / total_enc_time); + + fmt_debug_printf("OK\n"); + } + + return true; +} + +//const uint32_t rice_zero_run_m = 3, rice_dct_coeff_m = 2; + +const uint_vec& separate_tm_index(uint32_t block_width, uint32_t block_height, const basist::astc_ldr_t::grouped_trial_modes& grouped_enc_trial_modes, const basist::astc_ldr_t::trial_mode& tm, + uint32_t& cem_index, uint32_t& subset_index, uint32_t& ccs_index, uint32_t& grid_size, uint32_t& grid_aniso) +{ + cem_index = tm.m_cem; + assert(cem_index < basist::astc_ldr_t::OTM_NUM_CEMS); + + subset_index = tm.m_num_parts - 1; + assert(subset_index < basist::astc_ldr_t::OTM_NUM_SUBSETS); + + ccs_index = tm.m_ccs_index + 1; + assert(ccs_index < basist::astc_ldr_t::OTM_NUM_CCS); + + grid_size = (tm.m_grid_width >= (block_width - 1)) && (tm.m_grid_height >= (block_height - 1)); + grid_aniso = basist::astc_ldr_t::calc_grid_aniso_val(tm.m_grid_width, tm.m_grid_height, block_width, block_height); + + const uint_vec& modes = grouped_enc_trial_modes.m_tm_groups[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + return modes; +} + +static bool compare_log_block_configs(const astc_helpers::log_astc_block& trial_log_blk, const astc_helpers::log_astc_block& neighbor_log_blk) +{ + assert(!trial_log_blk.m_solid_color_flag_ldr); + + if (neighbor_log_blk.m_solid_color_flag_ldr) + return false; + + if ((trial_log_blk.m_color_endpoint_modes[0] == neighbor_log_blk.m_color_endpoint_modes[0]) && + (trial_log_blk.m_dual_plane == neighbor_log_blk.m_dual_plane) && (trial_log_blk.m_color_component_selector == neighbor_log_blk.m_color_component_selector) && + (trial_log_blk.m_num_partitions == neighbor_log_blk.m_num_partitions) && (trial_log_blk.m_partition_id == neighbor_log_blk.m_partition_id) && + (trial_log_blk.m_grid_width == neighbor_log_blk.m_grid_width) && (trial_log_blk.m_grid_height == neighbor_log_blk.m_grid_height) && + (trial_log_blk.m_endpoint_ise_range == neighbor_log_blk.m_endpoint_ise_range) && (trial_log_blk.m_weight_ise_range == neighbor_log_blk.m_weight_ise_range)) + { + return true; + } + + return false; +} + +static bool compare_log_block_configs_and_endpoints(const astc_helpers::log_astc_block& trial_log_blk, const astc_helpers::log_astc_block& neighbor_log_blk) +{ + if (!compare_log_block_configs(trial_log_blk, neighbor_log_blk)) + return false; + + const uint32_t total_endpoint_vals = trial_log_blk.m_num_partitions * astc_helpers::get_num_cem_values(trial_log_blk.m_color_endpoint_modes[0]); + if (memcmp(trial_log_blk.m_endpoints, neighbor_log_blk.m_endpoints, total_endpoint_vals) == 0) + return true; + + return false; +} + +static bool compare_log_blocks_for_equality(const astc_helpers::log_astc_block& trial_log_blk, const astc_helpers::log_astc_block& neighbor_log_blk) +{ + if (trial_log_blk.m_solid_color_flag_ldr) + { + if (!neighbor_log_blk.m_solid_color_flag_ldr) + return false; + + for (uint32_t i = 0; i < 4; i++) + if (trial_log_blk.m_solid_color[i] != neighbor_log_blk.m_solid_color[i]) + return false; + + return true; + } + else if (neighbor_log_blk.m_solid_color_flag_ldr) + { + return false; + } + + assert(!trial_log_blk.m_solid_color_flag_ldr && !neighbor_log_blk.m_solid_color_flag_ldr); + + if ((trial_log_blk.m_color_endpoint_modes[0] == neighbor_log_blk.m_color_endpoint_modes[0]) && + (trial_log_blk.m_dual_plane == neighbor_log_blk.m_dual_plane) && (trial_log_blk.m_color_component_selector == neighbor_log_blk.m_color_component_selector) && + (trial_log_blk.m_num_partitions == neighbor_log_blk.m_num_partitions) && (trial_log_blk.m_partition_id == neighbor_log_blk.m_partition_id) && + (trial_log_blk.m_grid_width == neighbor_log_blk.m_grid_width) && (trial_log_blk.m_grid_height == neighbor_log_blk.m_grid_height) && + (trial_log_blk.m_endpoint_ise_range == neighbor_log_blk.m_endpoint_ise_range) && (trial_log_blk.m_weight_ise_range == neighbor_log_blk.m_weight_ise_range)) + { + const uint32_t total_endpoint_vals = trial_log_blk.m_num_partitions * astc_helpers::get_num_cem_values(trial_log_blk.m_color_endpoint_modes[0]); + if (memcmp(trial_log_blk.m_endpoints, neighbor_log_blk.m_endpoints, total_endpoint_vals) == 0) + { + const uint32_t total_weights = (trial_log_blk.m_dual_plane ? 2 : 1) * (trial_log_blk.m_grid_width * trial_log_blk.m_grid_height); + return memcmp(trial_log_blk.m_weights, neighbor_log_blk.m_weights, total_weights) == 0; + } + } + + return false; +} + +void configure_encoder_effort_level(int level, ldr_astc_block_encode_image_high_level_config& cfg) +{ + switch (level) + { + case 10: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_force_all_dual_plane_chan_evals = true; + cfg.m_filter_by_pca_angles_flag = false; + + cfg.m_superbucket_max_to_retain[0] = 256; + cfg.m_superbucket_max_to_retain[1] = 256; + cfg.m_superbucket_max_to_retain[2] = 256; + + cfg.m_base_parts2 = 128; + cfg.m_base_parts3 = 128; + cfg.m_part2_fraction_to_keep = 1; + cfg.m_part3_fraction_to_keep = 1; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 128; + cfg.m_final_shortlist_max_size[1] = 128; + cfg.m_final_shortlist_max_size[2] = 128; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 1024; + cfg.m_superbucket_max_to_retain_p2[1] = 1024; + cfg.m_superbucket_max_to_retain_p2[2] = 1024; + cfg.m_final_shortlist_max_size_p2[0] = 256; + cfg.m_final_shortlist_max_size_p2[1] = 256; + cfg.m_final_shortlist_max_size_p2[2] = 256; + cfg.m_base_parts2_p2 = 128; + cfg.m_base_parts3_p2 = 128; + cfg.m_force_all_dp_chans_p2 = true; + cfg.m_filter_by_pca_angles_flag_p2 = false; + + cfg.m_final_encode_always_try_rgb_direct = true; + + cfg.m_early_stop_wpsnr = 90.0f; + cfg.m_early_stop2_wpsnr = 90.0f; + cfg.m_grid_hv_filtering = false; + cfg.m_low_freq_block_filtering = false; + + break; + } + case 9: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 8; + cfg.m_superbucket_max_to_retain[1] = 16; + cfg.m_superbucket_max_to_retain[2] = 32; + + cfg.m_base_parts2 = 32; + cfg.m_base_parts3 = 32; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 4; + cfg.m_final_shortlist_max_size[1] = 12; + cfg.m_final_shortlist_max_size[2] = 24; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 16; + cfg.m_superbucket_max_to_retain_p2[1] = 64; + cfg.m_superbucket_max_to_retain_p2[2] = 256; + cfg.m_final_shortlist_max_size_p2[0] = 8; + cfg.m_final_shortlist_max_size_p2[1] = 16; + cfg.m_final_shortlist_max_size_p2[2] = 32; + cfg.m_base_parts2_p2 = 64; + cfg.m_base_parts3_p2 = 64; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = false; + + cfg.m_final_encode_always_try_rgb_direct = false; + + cfg.m_early_stop_wpsnr = 75.0f; + cfg.m_early_stop2_wpsnr = 70.0f; + + break; + } + case 8: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 4; + cfg.m_superbucket_max_to_retain[1] = 8; + cfg.m_superbucket_max_to_retain[2] = 16; + + cfg.m_base_parts2 = 16; + cfg.m_base_parts3 = 16; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 3; + cfg.m_final_shortlist_max_size[1] = 8; + cfg.m_final_shortlist_max_size[2] = 12; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 16; + cfg.m_superbucket_max_to_retain_p2[1] = 64; + cfg.m_superbucket_max_to_retain_p2[2] = 256; + cfg.m_final_shortlist_max_size_p2[0] = 8; + cfg.m_final_shortlist_max_size_p2[1] = 16; + cfg.m_final_shortlist_max_size_p2[2] = 32; + cfg.m_base_parts2_p2 = 64; + cfg.m_base_parts3_p2 = 64; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = false; + + cfg.m_final_encode_always_try_rgb_direct = false; + + cfg.m_early_stop_wpsnr = 75.0f; + cfg.m_early_stop2_wpsnr = 70.0f; + break; + } + case 7: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_disable_rgb_dual_plane = false; + cfg.m_strong_dp_decorr_thresh_rgb = .9f; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 3; + cfg.m_superbucket_max_to_retain[1] = 7; + cfg.m_superbucket_max_to_retain[2] = 12; + + cfg.m_base_parts2 = 12; + cfg.m_base_parts3 = 12; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 2; + cfg.m_final_shortlist_max_size[1] = 4; + cfg.m_final_shortlist_max_size[2] = 8; + + cfg.m_gradient_descent_flag = true; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = true; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 4; + cfg.m_superbucket_max_to_retain_p2[1] = 16; + cfg.m_superbucket_max_to_retain_p2[2] = 32; + cfg.m_final_shortlist_max_size_p2[0] = 4; + cfg.m_final_shortlist_max_size_p2[1] = 16; + cfg.m_final_shortlist_max_size_p2[2] = 32; + cfg.m_base_parts2_p2 = 32; + cfg.m_base_parts3_p2 = 8; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 65.0f; + cfg.m_early_stop2_wpsnr = 60.0f; + break; + } + case 6: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_disable_rgb_dual_plane = false; + cfg.m_strong_dp_decorr_thresh_rgb = .75f; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 2; + cfg.m_superbucket_max_to_retain[1] = 5; + cfg.m_superbucket_max_to_retain[2] = 10; + + cfg.m_base_parts2 = 12; + cfg.m_base_parts3 = 10; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 4; + cfg.m_final_shortlist_max_size[2] = 8; + + cfg.m_gradient_descent_flag = true; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = true; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 2; + cfg.m_superbucket_max_to_retain_p2[1] = 8; + cfg.m_superbucket_max_to_retain_p2[2] = 16; + cfg.m_final_shortlist_max_size_p2[0] = 2; + cfg.m_final_shortlist_max_size_p2[1] = 8; + cfg.m_final_shortlist_max_size_p2[2] = 16; + cfg.m_base_parts2_p2 = 32; + cfg.m_base_parts3_p2 = 8; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 65.0f; + cfg.m_early_stop2_wpsnr = 60.0f; + break; + } + case 5: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_disable_rgb_dual_plane = false; + cfg.m_strong_dp_decorr_thresh_rgb = .75f; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 4; + cfg.m_superbucket_max_to_retain[2] = 8; + + cfg.m_base_parts2 = 12; + cfg.m_base_parts3 = 8; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 4; + cfg.m_final_shortlist_max_size[2] = 8; + + cfg.m_gradient_descent_flag = true; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 2; + cfg.m_superbucket_max_to_retain_p2[1] = 8; + cfg.m_superbucket_max_to_retain_p2[2] = 16; + cfg.m_final_shortlist_max_size_p2[0] = 2; + cfg.m_final_shortlist_max_size_p2[1] = 8; + cfg.m_final_shortlist_max_size_p2[2] = 16; + cfg.m_base_parts2_p2 = 32; + cfg.m_base_parts3_p2 = 8; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 65.0f; + cfg.m_early_stop2_wpsnr = 60.0f; + break; + } + case 4: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = true; + + cfg.m_disable_rgb_dual_plane = false; + cfg.m_strong_dp_decorr_thresh_rgb = .75f; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 4; + cfg.m_superbucket_max_to_retain[2] = 8; + + cfg.m_base_parts2 = 8; + cfg.m_base_parts3 = 4; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 4; + cfg.m_final_shortlist_max_size[2] = 8; + + cfg.m_gradient_descent_flag = true; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 2; + cfg.m_superbucket_max_to_retain_p2[1] = 8; + cfg.m_superbucket_max_to_retain_p2[2] = 16; + cfg.m_final_shortlist_max_size_p2[0] = 2; + cfg.m_final_shortlist_max_size_p2[1] = 8; + cfg.m_final_shortlist_max_size_p2[2] = 16; + cfg.m_base_parts2_p2 = 32; + cfg.m_base_parts3_p2 = 8; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 65.0f; + cfg.m_early_stop2_wpsnr = 60.0f; + break; + } + default: + case 3: + { + cfg.m_second_superpass_refinement = true; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = false; + + cfg.m_disable_rgb_dual_plane = false; + cfg.m_strong_dp_decorr_thresh_rgb = .75f; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 4; + cfg.m_superbucket_max_to_retain[2] = 8; + + cfg.m_base_parts2 = 4; + cfg.m_base_parts3 = 2; + cfg.m_part2_fraction_to_keep = 2; + cfg.m_part3_fraction_to_keep = 2; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 4; + cfg.m_final_shortlist_max_size[2] = 8; + + cfg.m_gradient_descent_flag = true; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .075f; + cfg.m_superbucket_max_to_retain_p2[0] = 2; + cfg.m_superbucket_max_to_retain_p2[1] = 8; + cfg.m_superbucket_max_to_retain_p2[2] = 16; + cfg.m_final_shortlist_max_size_p2[0] = 2; + cfg.m_final_shortlist_max_size_p2[1] = 8; + cfg.m_final_shortlist_max_size_p2[2] = 16; + cfg.m_base_parts2_p2 = 32; + cfg.m_base_parts3_p2 = 8; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 65.0f; + cfg.m_early_stop2_wpsnr = 60.0f; + break; + } + case 2: + { + // Level 2+ have subsets and RGB dual-plane enabled + cfg.m_second_superpass_refinement = false; + cfg.m_third_superpass_try_neighbors = true; + + cfg.m_subsets_enabled = true; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = false; + cfg.m_disable_rgb_dual_plane = false; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 2; + cfg.m_superbucket_max_to_retain[2] = 3; + + cfg.m_base_parts2 = 1; + cfg.m_base_parts3 = 0; + cfg.m_part2_fraction_to_keep = 1; + cfg.m_part3_fraction_to_keep = 1; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 2; + cfg.m_final_shortlist_max_size[2] = 3; + + cfg.m_gradient_descent_flag = false; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + // Second superpass + cfg.m_second_superpass_fract_to_recompress = .04f; + cfg.m_second_pass_force_subsets_enabled = true; + cfg.m_superbucket_max_to_retain_p2[0] = 1; + cfg.m_superbucket_max_to_retain_p2[1] = 2; + cfg.m_superbucket_max_to_retain_p2[2] = 8; + cfg.m_final_shortlist_max_size_p2[0] = 1; + cfg.m_final_shortlist_max_size_p2[1] = 2; + cfg.m_final_shortlist_max_size_p2[2] = 8; + cfg.m_base_parts2_p2 = 16; + cfg.m_base_parts3_p2 = 0; + cfg.m_force_all_dp_chans_p2 = false; + cfg.m_filter_by_pca_angles_flag_p2 = true; + + cfg.m_early_stop_wpsnr = 45.0f; + cfg.m_early_stop2_wpsnr = 40.0f; + break; + } + case 1: + { + cfg.m_second_superpass_refinement = false; + cfg.m_third_superpass_try_neighbors = false; + + cfg.m_subsets_enabled = false; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = false; + cfg.m_disable_rgb_dual_plane = true; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 1; + cfg.m_superbucket_max_to_retain[2] = 1; + + cfg.m_base_parts2 = 0; + cfg.m_base_parts3 = 0; + cfg.m_part2_fraction_to_keep = 1; + cfg.m_part3_fraction_to_keep = 1; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 1; + cfg.m_final_shortlist_max_size[2] = 1; + + cfg.m_gradient_descent_flag = false; + cfg.m_polish_weights_flag = true; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + cfg.m_early_stop_wpsnr = 45.0f; + cfg.m_early_stop2_wpsnr = 40.0f; + break; + } + case 0: + { + cfg.m_second_superpass_refinement = false; + cfg.m_third_superpass_try_neighbors = false; + + cfg.m_subsets_enabled = false; + cfg.m_use_blue_contraction = true; + cfg.m_use_base_ofs = false; + cfg.m_disable_rgb_dual_plane = true; + + cfg.m_force_all_dual_plane_chan_evals = false; + cfg.m_filter_by_pca_angles_flag = true; + + cfg.m_superbucket_max_to_retain[0] = 1; + cfg.m_superbucket_max_to_retain[1] = 1; + cfg.m_superbucket_max_to_retain[2] = 1; + + cfg.m_base_parts2 = 0; + cfg.m_base_parts3 = 0; + cfg.m_part2_fraction_to_keep = 1; + cfg.m_part3_fraction_to_keep = 1; + + cfg.m_final_shortlist_fraction[0] = 1.0f; + cfg.m_final_shortlist_fraction[1] = 1.0f; + cfg.m_final_shortlist_fraction[2] = 1.0f; + + cfg.m_final_shortlist_max_size[0] = 1; + cfg.m_final_shortlist_max_size[1] = 1; + cfg.m_final_shortlist_max_size[2] = 1; + + cfg.m_gradient_descent_flag = false; + cfg.m_polish_weights_flag = false; + cfg.m_qcd_enabled_flag = false; + + cfg.m_bucket_pruning_passes = false; + cfg.m_cem_enc_params.m_max_ls_passes = 1; + + cfg.m_early_stop_wpsnr = 45.0f; + cfg.m_early_stop2_wpsnr = 40.0f; + break; + } + } +} + +#if BASISD_SUPPORT_KTX2_ZSTD +static bool zstd_compress(const uint8_t* pData, size_t data_len, uint8_vec& comp_data, int zstd_level) +{ + if (!data_len) + { + comp_data.resize(0); + return true; + } + + assert(pData); + + comp_data.resize(ZSTD_compressBound(data_len)); + + size_t result = ZSTD_compress(comp_data.data(), comp_data.size(), pData, data_len, zstd_level); + + if (ZSTD_isError(result)) + { + comp_data.resize(0); + return false; + } + + if (result > UINT32_MAX) + { + comp_data.resize(0); + return false; + } + + comp_data.resize(result); + return true; +} + +static bool zstd_compress(const bitwise_coder& coder, uint8_vec& comp_data, int zstd_level) +{ + return zstd_compress(coder.get_bytes().data(), coder.get_bytes().size(), comp_data, zstd_level); +} + +static bool zstd_compress(const uint8_vec& vec, uint8_vec& comp_data, int zstd_level) +{ + return zstd_compress(vec.data(), vec.size(), comp_data, zstd_level); +} + +static uint32_t encode_values(bitwise_coder& coder, uint32_t total_values, const uint8_t* pVals, uint32_t endpoint_range) +{ + const uint32_t MAX_VALS = 64; + uint32_t bit_values[MAX_VALS], tq_values[(MAX_VALS + 2) / 3]; + uint32_t total_tq_values = 0, tq_accum = 0, tq_mul = 1; + + assert((total_values) && (total_values <= MAX_VALS)); + + const uint32_t ep_bits = astc_helpers::g_ise_range_table[endpoint_range][0]; + const uint32_t ep_trits = astc_helpers::g_ise_range_table[endpoint_range][1]; + const uint32_t ep_quints = astc_helpers::g_ise_range_table[endpoint_range][2]; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t val = pVals[i]; + + uint32_t bits = val & ((1 << ep_bits) - 1); + uint32_t tq = val >> ep_bits; + + bit_values[i] = bits; + + if (ep_trits) + { + assert(tq < 3); + tq_accum += tq * tq_mul; + tq_mul *= 3; + if (tq_mul == 243) + { + assert(total_tq_values < BASISU_ARRAY_SIZE(tq_values)); + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + else if (ep_quints) + { + assert(tq < 5); + tq_accum += tq * tq_mul; + tq_mul *= 5; + if (tq_mul == 125) + { + assert(total_tq_values < BASISU_ARRAY_SIZE(tq_values)); + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + } + + uint32_t total_bits_output = 0; + + for (uint32_t i = 0; i < total_tq_values; i++) + { + const uint32_t num_bits = ep_trits ? 8 : 7; + coder.put_bits(tq_values[i], num_bits); + total_bits_output += num_bits; + } + + if (tq_mul > 1) + { + uint32_t num_bits; + if (ep_trits) + { + if (tq_mul == 3) + num_bits = 2; + else if (tq_mul == 9) + num_bits = 4; + else if (tq_mul == 27) + num_bits = 5; + else //if (tq_mul == 81) + num_bits = 7; + } + else + { + if (tq_mul == 5) + num_bits = 3; + else //if (tq_mul == 25) + num_bits = 5; + } + coder.put_bits(tq_accum, num_bits); + total_bits_output += num_bits; + } + + for (uint32_t i = 0; i < total_values; i++) + { + coder.put_bits(bit_values[i], ep_bits); + total_bits_output += ep_bits; + } + + return total_bits_output; +} + +static bool compress_image_full_zstd( + const image& orig_img, uint8_vec& comp_data, vector2D& coded_blocks, + const astc_ldr_encode_config& global_cfg, + job_pool& job_pool, + ldr_astc_block_encode_image_high_level_config& enc_cfg, const ldr_astc_block_encode_image_output& enc_out) +{ + BASISU_NOTE_UNUSED(job_pool); + + const uint32_t width = orig_img.get_width(), height = orig_img.get_height(); + + const uint32_t block_width = global_cfg.m_astc_block_width; + const uint32_t block_height = global_cfg.m_astc_block_height; + const uint32_t total_block_pixels = block_width * block_height; + + const uint32_t total_pixels = width * height; + const uint32_t num_blocks_x = (width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (height + block_height - 1) / block_height; + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + const bool has_alpha = orig_img.has_alpha(); + + // Mode + uint8_vec mode_bytes; + mode_bytes.reserve(8192); + + bitwise_coder raw_bits; + raw_bits.init(8192); + + uint8_vec solid_dpcm_bytes; + solid_dpcm_bytes.reserve(8192); + + // Endpoints + uint8_vec endpoint_dpcm_reuse_indices; + endpoint_dpcm_reuse_indices.reserve(8192); + + bitwise_coder use_bc_bits; + use_bc_bits.init(1024); + + bitwise_coder endpoint_dpcm_3bit; + endpoint_dpcm_3bit.init(1024); + + bitwise_coder endpoint_dpcm_4bit; + endpoint_dpcm_4bit.init(1024); + + uint8_vec endpoint_dpcm_5bit; + endpoint_dpcm_5bit.reserve(8192); + + uint8_vec endpoint_dpcm_6bit; + endpoint_dpcm_6bit.reserve(8192); + + uint8_vec endpoint_dpcm_7bit; + endpoint_dpcm_7bit.reserve(8192); + + uint8_vec endpoint_dpcm_8bit; + endpoint_dpcm_8bit.reserve(8192); + + // Weights + bitwise_coder mean0_bits; + uint8_vec mean1_bytes; + uint8_vec run_bytes; + uint8_vec coeff_bytes; + bitwise_coder sign_bits; + bitwise_coder weight2_bits; + bitwise_coder weight3_bits; + bitwise_coder weight4_bits; + uint8_vec weight8_bits; + + mean0_bits.init(1024); + mean1_bytes.reserve(1024); + run_bytes.reserve(8192); + coeff_bytes.reserve(8192); + sign_bits.init(1024); + weight2_bits.init(1024); + weight3_bits.init(1024); + weight4_bits.init(1024); + weight8_bits.reserve(8192); + + const float replacement_min_psnr = has_alpha ? global_cfg.m_replacement_min_psnr_alpha : global_cfg.m_replacement_min_psnr; + const float psnr_trial_diff_thresh = has_alpha ? global_cfg.m_psnr_trial_diff_thresh_alpha : global_cfg.m_psnr_trial_diff_thresh; + const float psnr_trial_diff_thresh_edge = has_alpha ? global_cfg.m_psnr_trial_diff_thresh_edge_alpha : global_cfg.m_psnr_trial_diff_thresh_edge; + const float total_comp_weights = enc_cfg.m_cem_enc_params.get_total_comp_weights(); + + basist::astc_ldr_t::grid_weight_dct grid_dct; + grid_dct.init(block_width, block_height); + + coded_blocks.resize(num_blocks_x, num_blocks_y); + for (uint32_t y = 0; y < num_blocks_y; y++) + for (uint32_t x = 0; x < num_blocks_x; x++) + coded_blocks(x, y).clear(); + + vector2D prev_block_states(num_blocks_x, num_blocks_y); + + int part2_hash[basist::astc_ldr_t::PART_HASH_SIZE]; + std::fill(part2_hash, part2_hash + basist::astc_ldr_t::PART_HASH_SIZE, -1); + + int part3_hash[basist::astc_ldr_t::PART_HASH_SIZE]; + std::fill(part3_hash, part3_hash + basist::astc_ldr_t::PART_HASH_SIZE, -1); + + int tm_hash[basist::astc_ldr_t::TM_HASH_SIZE]; + std::fill(tm_hash, tm_hash + basist::astc_ldr_t::TM_HASH_SIZE, -1); + + const bool use_run_commands_global_enable = true; + const bool endpoint_dpcm_global_enable = true; + + uint32_t cur_run_len = 0; + + uint32_t total_runs = 0, total_run_blocks = 0, total_nonrun_blocks = 0; + uint32_t total_lossy_replacements = 0; + uint32_t total_solid_blocks = 0; + uint32_t total_full_reuse_commands = 0; + uint32_t total_raw_commands = 0; + uint32_t total_reuse_full_cfg_emitted = 0; + uint32_t total_full_cfg_emitted = 0; + uint32_t num_part_hash_probes = 0; + uint32_t num_part_hash_hits = 0; + uint32_t total_used_endpoint_dpcm = 0; + uint32_t total_used_endpoint_raw = 0; + uint32_t total_used_dct = 0; + uint32_t total_used_weight_dpcm = 0; + uint32_t num_tm_hash_hits = 0, num_tm_hash_probes = 0; + + raw_bits.put_bits(basist::astc_ldr_t::FULL_ZSTD_HEADER_MARKER, basist::astc_ldr_t::FULL_ZSTD_HEADER_MARKER_BITS); + + const int block_dim_index = astc_helpers::find_astc_block_size_index(block_width, block_height); + assert((block_dim_index >= 0) && (block_dim_index < (int)astc_helpers::NUM_ASTC_BLOCK_SIZES)); + + raw_bits.put_bits(block_dim_index, 4); + + raw_bits.put_bits(enc_cfg.m_cem_enc_params.m_decode_mode_srgb, 1); + + raw_bits.put_bits(width, 16); + raw_bits.put_bits(height, 16); + + raw_bits.put_bits(has_alpha, 1); + + raw_bits.put_bits(enc_cfg.m_use_dct, 1); + if (enc_cfg.m_use_dct) + { + const int int_q = clamp((int)std::round(global_cfg.m_dct_quality * 2.0f), 0, 200); + raw_bits.put_bits(int_q, 8); + } + + const uint32_t FULL_ZSTD_MAX_RUN_LEN = 64; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + //const uint32_t base_y = by * block_height; + + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + //const uint32_t base_x = bx * block_width; + //raw_bits.put_bits(0xA1, 8); + + basist::astc_ldr_t::prev_block_state_full_zstd& prev_state = prev_block_states(bx, by); + + const basist::astc_ldr_t::prev_block_state_full_zstd* pLeft_state = bx ? &prev_block_states(bx - 1, by) : nullptr; + const basist::astc_ldr_t::prev_block_state_full_zstd* pUpper_state = by ? &prev_block_states(bx, by - 1) : nullptr; + const basist::astc_ldr_t::prev_block_state_full_zstd* pDiag_state = (bx && by) ? &prev_block_states(bx - 1, by - 1) : nullptr; + + const ldr_astc_block_encode_image_output::block_info& blk_info = enc_out.m_image_block_info(bx, by); + + uint32_t best_packed_out_block_index = blk_info.m_packed_out_block_index; + + // check for run + if ((use_run_commands_global_enable) && (bx || by)) + { + const encode_block_output& blk_out = blk_info.m_out_blocks[best_packed_out_block_index]; + const astc_helpers::log_astc_block& cur_log_blk = blk_out.m_log_blk; + + const astc_helpers::log_astc_block& prev_log_blk = bx ? coded_blocks(bx - 1, by) : coded_blocks(0, by - 1); + const basist::astc_ldr_t::prev_block_state_full_zstd* pPrev_block_state = bx ? pLeft_state : pUpper_state; + + assert(pPrev_block_state); + + if (compare_log_blocks_for_equality(cur_log_blk, prev_log_blk)) + { + // Left or upper is exactly the same logical block, so expand the run. + cur_run_len++; + + // Accept the previous block (left or upper) as if it's been coded normally. + + coded_blocks(bx, by) = prev_log_blk; + + //prev_state.m_was_solid_color = pPrev_block_state->m_was_solid_color; + prev_state.m_tm_index = pPrev_block_state->m_tm_index; + //prev_state.m_base_cem_index = pPrev_block_state->m_base_cem_index; + + if (cur_run_len == FULL_ZSTD_MAX_RUN_LEN) + { + total_runs++; + total_run_blocks += cur_run_len; + mode_bytes.push_back((uint8_t)((uint32_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_RUN | ((cur_run_len - 1) << 2))); + cur_run_len = 0; + } + + continue; + } + } + + if (cur_run_len) + { + assert(cur_run_len <= FULL_ZSTD_MAX_RUN_LEN); + + total_runs++; + total_run_blocks += cur_run_len; + mode_bytes.push_back((uint8_t)((uint32_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_RUN | ((cur_run_len - 1) << 2))); + cur_run_len = 0; + } + + total_nonrun_blocks++; + + // TODO: Move this to a prepass that's shared between arith/zstd + const float ref_wmse = (float)blk_info.m_out_blocks[best_packed_out_block_index].m_sse / (total_comp_weights * (float)total_block_pixels); + const float ref_wpsnr = (ref_wmse > 1e-5f) ? 20.0f * log10f(255.0f / sqrtf(ref_wmse)) : 10000.0f; + + if ((global_cfg.m_lossy_supercompression) && (ref_wpsnr >= replacement_min_psnr) && + (!blk_info.m_out_blocks[blk_info.m_packed_out_block_index].m_log_blk.m_solid_color_flag_ldr)) + { + const float psnr_thresh = blk_info.m_strong_edges ? psnr_trial_diff_thresh_edge : psnr_trial_diff_thresh; + + float best_alt_wpsnr = 0.0f; + bool found_alternative = false; + + // Pass: 0 consider full config+part ID endpoint reuse + // Pass: 1 fall back to just full config+part ID reuse (no endpoints) + for (uint32_t pass = 0; pass < 2; pass++) + { + // Iterate through all available alternative candidates + for (uint32_t out_block_iter = 0; out_block_iter < blk_info.m_out_blocks.size(); out_block_iter++) + { + if (out_block_iter == blk_info.m_packed_out_block_index) + continue; + + const float trial_wmse = (float)blk_info.m_out_blocks[out_block_iter].m_sse / (total_comp_weights * (float)total_block_pixels); + const float trial_wpsnr = (trial_wmse > 1e-5f) ? 20.0f * log10f(255.0f / sqrtf(trial_wmse)) : 10000.0f; + + // Reject if PSNR too low + if (trial_wpsnr < (ref_wpsnr - psnr_thresh)) + continue; + + // Reject if inferior than best found so far + if (trial_wpsnr < best_alt_wpsnr) + continue; + + const astc_helpers::log_astc_block& trial_log_blk = blk_info.m_out_blocks[out_block_iter].m_log_blk; + + if (trial_log_blk.m_solid_color_flag_ldr) + continue; + + // Examine nearby neighbors + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; break; + case 1: dy = -1; break; + case 2: dx = -1; dy = -1; break; + default: assert(0); break; + } + + const int n_bx = bx + dx, n_by = by + dy; + if ((n_bx < 0) || (n_by < 0)) + continue; + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (neighbor_log_blk.m_solid_color_flag_ldr) + continue; + + bool accept_flag = false; + if (pass == 0) + { + // prefer full config+endpoint equality first + accept_flag = compare_log_block_configs_and_endpoints(trial_log_blk, neighbor_log_blk); + } + else + { + // next check for just config equality + accept_flag = compare_log_block_configs(trial_log_blk, neighbor_log_blk); + } + + if (accept_flag) + { + best_alt_wpsnr = trial_wpsnr; + best_packed_out_block_index = out_block_iter; + found_alternative = true; + break; + } + + } // i + + } // out_block_iter + + if (found_alternative) + break; + + } // pass + + if (best_packed_out_block_index != blk_info.m_packed_out_block_index) + total_lossy_replacements++; + + } // global_cfg.m_lossy_supercompression + + const encode_block_output& blk_out = blk_info.m_out_blocks[best_packed_out_block_index]; + + astc_helpers::log_astc_block& cur_log_blk = coded_blocks(bx, by); + + cur_log_blk = blk_out.m_log_blk; + + // Solid color/void extent + if (blk_out.m_trial_mode_index < 0) + { + assert(cur_log_blk.m_solid_color_flag_ldr); + + total_solid_blocks++; + + mode_bytes.push_back((uint8_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_SOLID); + + uint32_t cur_solid_color[4]; + for (uint32_t i = 0; i < 4; i++) + cur_solid_color[i] = blk_out.m_log_blk.m_solid_color[i] >> 8; + + uint32_t prev_solid_color[4] = { 0 }; + + const uint32_t num_comps = has_alpha ? 4 : 3; + + astc_helpers::log_astc_block* pPrev_log_blk = bx ? &coded_blocks(bx - 1, by) : (by ? &coded_blocks(bx, by - 1) : nullptr); + if (pPrev_log_blk) + { + if (pPrev_log_blk->m_solid_color_flag_ldr) + { + prev_solid_color[0] = pPrev_log_blk->m_solid_color[0] >> 8; + prev_solid_color[1] = pPrev_log_blk->m_solid_color[1] >> 8; + prev_solid_color[2] = pPrev_log_blk->m_solid_color[2] >> 8; + prev_solid_color[3] = pPrev_log_blk->m_solid_color[3] >> 8; + } + else + { + // Decode previous block's first CEM, use the halfway point as the predictor. + color_rgba prev_l, prev_h; + decode_endpoints(pPrev_log_blk->m_color_endpoint_modes[0], pPrev_log_blk->m_endpoints, pPrev_log_blk->m_endpoint_ise_range, prev_l, prev_h); + + prev_solid_color[0] = (prev_l[0] + prev_h[0] + 1) >> 1; + prev_solid_color[1] = (prev_l[1] + prev_h[1] + 1) >> 1; + prev_solid_color[2] = (prev_l[2] + prev_h[2] + 1) >> 1; + prev_solid_color[3] = (prev_l[3] + prev_h[3] + 1) >> 1; + } + } + + for (uint32_t i = 0; i < num_comps; i++) + { + const uint32_t delta = (cur_solid_color[i] - prev_solid_color[i]) & 0xFF; + solid_dpcm_bytes.push_back((uint8_t)delta); + } + + //prev_state.m_was_solid_color = true; + prev_state.m_tm_index = -1; + //prev_state.m_base_cem_index = astc_helpers::CEM_LDR_RGB_DIRECT; + + continue; + } + + assert(!cur_log_blk.m_solid_color_flag_ldr); + + int full_cfg_endpoint_reuse_index = -1; + + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; break; + case 1: dy = -1; break; + case 2: dx = -1; dy = -1; break; + default: assert(0); break; + } + + const int n_bx = bx + dx, n_by = by + dy; + if ((n_bx < 0) || (n_by < 0)) + continue; + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (neighbor_log_blk.m_solid_color_flag_ldr) + continue; + + if (compare_log_block_configs_and_endpoints(cur_log_blk, neighbor_log_blk)) + { + full_cfg_endpoint_reuse_index = i; + break; + } + } // i + + if (full_cfg_endpoint_reuse_index >= 0) + { + // Reused full config, part ID and endpoint values from an immediate neighbor + mode_bytes.push_back((uint8_t)((uint32_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_REUSE_CFG_ENDPOINTS_LEFT + (full_cfg_endpoint_reuse_index << 2))); + + total_full_reuse_commands++; + + const basist::astc_ldr_t::prev_block_state_full_zstd* pReused_cfg_state = nullptr; + + switch (full_cfg_endpoint_reuse_index) + { + case 0: pReused_cfg_state = pLeft_state; break; + case 1: pReused_cfg_state = pUpper_state; break; + case 2: pReused_cfg_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pReused_cfg_state) + { + assert(0); + fmt_error_printf("encoding internal failure\n"); + return false; + } + + assert(pReused_cfg_state->m_tm_index == blk_out.m_trial_mode_index); + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + } + else + { + // No nearby full config+part ID+endpoint reuse, so send raw command + // Must send endpoints too. + total_raw_commands++; + + // Format of mode byte (UD bit used in modes other than raw) + // 7 6 5 4 3 2 1 0 + // UD C ED HH BO I I M + + // MMM=mode + // II=neighbor reuse index [0,3], 3=no reuse + // BO=base offset flag + // HH=partition hash hit flag + // ED=endpoint DPCM flag + // C=config hash table hit + // UD=use DCT flag + + mode_bytes.push_back((uint8_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_RAW); + + const uint32_t cur_actual_cem = cur_log_blk.m_color_endpoint_modes[0]; + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cur_actual_cem); + + // DO NOT use tm.m_cem because the encoder may have selected a base+ofs variant instead. Use cur_actual_cem. + const basist::astc_ldr_t::trial_mode& tm = enc_out.m_encoder_trial_modes[blk_out.m_trial_mode_index]; + + // Check for config+part ID neighbor reuse (partial refuse) + int neighbor_cfg_match_index = -1; + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + const basist::astc_ldr_t::prev_block_state_full_zstd* pNeighbor_state = nullptr; + + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; pNeighbor_state = pLeft_state; break; + case 1: dy = -1; pNeighbor_state = pUpper_state; break; + case 2: dx = -1; dy = -1; pNeighbor_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pNeighbor_state) + continue; + + const int n_bx = bx + dx, n_by = by + dy; + assert((n_bx >= 0) && (n_by >= 0)); + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (pNeighbor_state->m_tm_index != blk_out.m_trial_mode_index) + continue; + + if (neighbor_log_blk.m_color_endpoint_modes[0] != cur_log_blk.m_color_endpoint_modes[0]) + continue; + + if (neighbor_log_blk.m_partition_id != cur_log_blk.m_partition_id) + continue; + + assert(neighbor_log_blk.m_dual_plane == cur_log_blk.m_dual_plane); + assert(neighbor_log_blk.m_color_component_selector == cur_log_blk.m_color_component_selector); + assert(neighbor_log_blk.m_num_partitions == cur_log_blk.m_num_partitions); + assert(neighbor_log_blk.m_grid_width == cur_log_blk.m_grid_width); + assert(neighbor_log_blk.m_grid_height == cur_log_blk.m_grid_height); + assert(neighbor_log_blk.m_endpoint_ise_range == cur_log_blk.m_endpoint_ise_range); + assert(neighbor_log_blk.m_weight_ise_range == cur_log_blk.m_weight_ise_range); + + neighbor_cfg_match_index = i; + break; + } + + if (neighbor_cfg_match_index >= 0) + { + // Partial reuse (config+partition ID, but not endpoints). + // OR 2-bits into the mode byte + mode_bytes.back() |= (uint8_t)(neighbor_cfg_match_index << 1); + + const basist::astc_ldr_t::prev_block_state_full_zstd* pReused_cfg_state = nullptr; + + switch (neighbor_cfg_match_index) + { + case 0: pReused_cfg_state = pLeft_state; break; + case 1: pReused_cfg_state = pUpper_state; break; + case 2: pReused_cfg_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pReused_cfg_state) + { + assert(0); + fmt_error_printf("encoding internal failure\n"); + return false; + } + + assert(pReused_cfg_state->m_tm_index == blk_out.m_trial_mode_index); + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + + total_reuse_full_cfg_emitted++; + } + else + { + // No reuse - must send config, so pack it. Then send endpoints. + total_full_cfg_emitted++; + + // OR 2-bits into the mode byte (so now 5 bits total) + mode_bytes.back() |= (uint8_t)(((uint32_t)basist::astc_ldr_t::cMaxConfigReuseNeighbors) << 1); + + // Pack tm index (ASTC base config) + { + num_tm_hash_probes++; + + uint32_t tm_h = basist::astc_ldr_t::tm_hash_index(blk_out.m_trial_mode_index); + + if (tm_hash[tm_h] == blk_out.m_trial_mode_index) + { + num_tm_hash_hits++; + + mode_bytes.back() |= (uint8_t)basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_TM_HASH_HIT_FLAG; // tm hash hit flag + + raw_bits.put_bits(tm_h, basist::astc_ldr_t::TM_HASH_BITS); + } + else + { + raw_bits.put_truncated_binary(blk_out.m_trial_mode_index, (uint32_t)enc_out.m_encoder_trial_modes.size()); + + tm_hash[tm_h] = blk_out.m_trial_mode_index; + } + } + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + + // Send base_ofs bit if the tm is direct + if ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + const bool is_base_ofs = (cur_log_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || + (cur_log_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET); + + if (is_base_ofs) + mode_bytes.back() |= basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_IS_BASE_OFS_FLAG; // base_ofs bit + } + + if (tm.m_num_parts > 1) + { + // Send unique part pattern ID + const astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? &enc_out.m_part_data_p2 : &enc_out.m_part_data_p3; + + const uint32_t astc_pat_index = cur_log_blk.m_partition_id; + const uint32_t unique_pat_index = pPart_data->m_part_seed_to_unique_index[astc_pat_index]; + const uint32_t total_unique_indices = pPart_data->m_total_unique_patterns; + assert(unique_pat_index < total_unique_indices); + + num_part_hash_probes++; + + int* pPart_hash = (tm.m_num_parts == 2) ? part2_hash : part3_hash; + + const uint32_t h = basist::astc_ldr_t::part_hash_index(unique_pat_index); + + if (pPart_hash[h] != (int)unique_pat_index) + { +#if defined(_DEBUG) || defined(DEBUG) + // sanity + for (uint32_t i = 0; i < basist::astc_ldr_t::PART_HASH_SIZE; i++) + { + assert(pPart_hash[i] != (int)unique_pat_index); + } +#endif + + raw_bits.put_truncated_binary(unique_pat_index, total_unique_indices); + } + else + { + num_part_hash_hits++; + + mode_bytes.back() |= basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_PART_HASH_HIT; // hash pat_index hit bit + raw_bits.put_bits(h, basist::astc_ldr_t::PART_HASH_BITS); + } + + pPart_hash[basist::astc_ldr_t::part_hash_index(unique_pat_index)] = unique_pat_index; + } + } + + // Send endpoints + const int num_endpoint_levels = astc_helpers::get_ise_levels(cur_log_blk.m_endpoint_ise_range); + const auto& endpoint_ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(cur_log_blk.m_endpoint_ise_range).m_ISE_to_rank; + + bool endpoints_use_bc[astc_helpers::MAX_PARTITIONS] = { false }; + + if (astc_helpers::cem_supports_bc(cur_actual_cem)) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool cur_uses_bc = astc_helpers::used_blue_contraction(cur_actual_cem, cur_log_blk.m_endpoints + part_iter * total_endpoint_vals, cur_log_blk.m_endpoint_ise_range); + + endpoints_use_bc[part_iter] = cur_uses_bc; + + } // part_iter + } + + int best_reuse_bx = -1, best_reuse_by = -1; + uint32_t best_reuse_index = 0; + const astc_helpers::log_astc_block* pEndpoint_pred_log_blk = nullptr; + + if (endpoint_dpcm_global_enable) + { + int64_t best_trial_delta2 = INT64_MAX; + float best_trial_bits = BIG_FLOAT_VAL; + + // TODO: Decide if DPCM is even worth it. + const float N = (float)(total_endpoint_vals * tm.m_num_parts); + + for (uint32_t reuse_index = 0; reuse_index < basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS; reuse_index++) + { + const int rx = (int)bx + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_index].m_x; + const int ry = (int)by + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_index].m_y; + if ((rx < 0) || (ry < 0) || (rx >= (int)num_blocks_x) || (ry >= (int)num_blocks_y)) + continue; + + const astc_helpers::log_astc_block* pTrial_log_blk = &coded_blocks(rx, ry); + if (pTrial_log_blk->m_solid_color_flag_ldr) + continue; + + uint8_t trial_predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + uint32_t part_iter; + for (part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + bool conv_status = basist::astc_ldr_t::convert_endpoints_across_cems( + pTrial_log_blk->m_color_endpoint_modes[0], pTrial_log_blk->m_endpoint_ise_range, pTrial_log_blk->m_endpoints, + cur_actual_cem, cur_log_blk.m_endpoint_ise_range, trial_predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + break; + } // part_iter + + if (part_iter < tm.m_num_parts) + continue; // failed + + int64_t trial_endpoint_delta2 = 0; + for (part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + int cur_e_rank = endpoint_ise_to_rank[cur_log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter]]; + int prev_e_rank = endpoint_ise_to_rank[trial_predicted_endpoints[part_iter][val_iter]]; + + int e_delta = cur_e_rank - prev_e_rank; + + trial_endpoint_delta2 += e_delta * e_delta; + + } // val_iter + + } // part_iter + + const float mse = (float)trial_endpoint_delta2 / N; + + // Gaussian entropy estimate - precomputed 0.5 * log2(2*pi*e) = ~2.0470956f + const float k_const = 2.0470956f; + + float bits_per_sym = 0.5f * log2f(basisu::maximum(mse, 1e-9f)) + k_const; + + bits_per_sym = clamp(bits_per_sym, 0.05f, 8.0f); + + // total est bits for this block’s endpoints + float total_est_bits = bits_per_sym * N; + + if (total_est_bits < best_trial_bits) + { + best_trial_delta2 = trial_endpoint_delta2; + best_trial_bits = total_est_bits; + + best_reuse_bx = rx; + best_reuse_by = ry; + best_reuse_index = reuse_index; + + if (!best_trial_delta2) + break; + } + + } // reuse_index + + if (best_reuse_bx >= 0) + { + pEndpoint_pred_log_blk = &coded_blocks(best_reuse_bx, best_reuse_by); + + assert(!pEndpoint_pred_log_blk->m_solid_color_flag_ldr); + } + + } // if (endpoint_dpcm_global_enable) + + uint8_t predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + bool use_dpcm_endpoints = false; + + if (pEndpoint_pred_log_blk) + { + use_dpcm_endpoints = true; + + assert(cur_log_blk.m_num_partitions == tm.m_num_parts); + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + bool conv_status = basist::astc_ldr_t::convert_endpoints_across_cems( + pEndpoint_pred_log_blk->m_color_endpoint_modes[0], pEndpoint_pred_log_blk->m_endpoint_ise_range, pEndpoint_pred_log_blk->m_endpoints, + cur_actual_cem, cur_log_blk.m_endpoint_ise_range, predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + { + // In practice, should never happen + use_dpcm_endpoints = false; + break; + } + } + } + + // TODO: Decide what is cheaper, endpoint DPCM vs. raw + + if (use_dpcm_endpoints) + { + // DPCM flag bit + mode_bytes.back() |= basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_DPCM_ENDPOINTS_FLAG; + + endpoint_dpcm_reuse_indices.push_back((uint8_t)best_reuse_index); + + if (astc_helpers::cem_supports_bc(cur_actual_cem)) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + use_bc_bits.put_bits(endpoints_use_bc[part_iter], 1); + + } // part_iter + } + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + int cur_e_rank = endpoint_ise_to_rank[cur_log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter]]; + int prev_e_rank = endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]; + + int e_val = imod(cur_e_rank - prev_e_rank, num_endpoint_levels); + + if (num_endpoint_levels <= 8) + endpoint_dpcm_3bit.put_bits(e_val, 4); + else if (num_endpoint_levels <= 16) + endpoint_dpcm_4bit.put_bits(e_val, 4); + else if (num_endpoint_levels <= 32) + endpoint_dpcm_5bit.push_back((uint8_t)e_val); + else if (num_endpoint_levels <= 64) + endpoint_dpcm_6bit.push_back((uint8_t)e_val); + else if (num_endpoint_levels <= 128) + endpoint_dpcm_7bit.push_back((uint8_t)e_val); + else if (num_endpoint_levels <= 256) + endpoint_dpcm_8bit.push_back((uint8_t)e_val); + + } // val_iter + + } // part_iter + + total_used_endpoint_dpcm++; + } + else + { + encode_values(raw_bits, tm.m_num_parts * total_endpoint_vals, cur_log_blk.m_endpoints, cur_log_blk.m_endpoint_ise_range); + + total_used_endpoint_raw++; + } // if (use_dpcm_endpoints) + + } // if (full_cfg_endpoint_reuse_index >= 0) + + // ------------------------------------ Send weights + + const uint32_t total_planes = cur_log_blk.m_dual_plane ? 2 : 1; + const uint32_t total_weights = cur_log_blk.m_grid_width * cur_log_blk.m_grid_height; + + const int num_weight_levels = astc_helpers::get_ise_levels(cur_log_blk.m_weight_ise_range); + const auto& weight_ise_to_rank = astc_helpers::g_dequant_tables.get_weight_tab(cur_log_blk.m_weight_ise_range).m_ISE_to_rank; + + bool use_dct = enc_cfg.m_use_dct; + + // TODO - tune this threshold + const uint32_t SWITCH_TO_DPCM_NUM_COEFF_THRESH = (cur_log_blk.m_grid_width * cur_log_blk.m_grid_height * 45 + 64) >> 7; + + if (use_dct) + { + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + if (syms.m_max_coeff_mag > basist::astc_ldr_t::DCT_MAX_ARITH_COEFF_MAG) + { + use_dct = false; + break; + } + + if (syms.m_coeffs.size() > SWITCH_TO_DPCM_NUM_COEFF_THRESH) + { + use_dct = false; + break; + } + } + } + + // MSB of mode byte=use DCT + if (enc_cfg.m_use_dct) + { + assert((mode_bytes.back() & basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_USE_DCT) == 0); + + if (use_dct) + mode_bytes.back() |= basist::astc_ldr_t::XUASTC_LDR_MODE_BYTE_USE_DCT; + } + + if (use_dct) + { + total_used_dct++; + + if (total_planes > 1) + { + assert(blk_out.m_packed_dct_plane_data[0].m_num_dc_levels == blk_out.m_packed_dct_plane_data[1].m_num_dc_levels); + } + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + + if (syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS1) + mean1_bytes.push_back((uint8_t)syms.m_dc_sym); + else + { + assert(syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS0); + mean0_bits.put_bits(syms.m_dc_sym, 4); + } + + for (uint32_t i = 0; i < syms.m_coeffs.size(); i++) + { + if (syms.m_coeffs[i].m_coeff == INT16_MAX) + { + run_bytes.push_back(basist::astc_ldr_t::DCT_RUN_LEN_EOB_SYM_INDEX); + } + else + { + run_bytes.push_back((uint8_t)syms.m_coeffs[i].m_num_zeros); + + sign_bits.put_bits(syms.m_coeffs[i].m_coeff < 0, 1); + + assert((syms.m_coeffs[i].m_coeff != 0) && (iabs(syms.m_coeffs[i].m_coeff) <= 255)); + + coeff_bytes.push_back((uint8_t)(iabs(syms.m_coeffs[i].m_coeff) - 1)); + } + } + + } // plane_iter + } + else + { + total_used_weight_dpcm++; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + int ise_w = cur_log_blk.m_weights[plane_iter + weight_iter * total_planes]; + int w = weight_ise_to_rank[ise_w]; + + int w_to_code = w; + w_to_code = imod(w - prev_w, num_weight_levels); + + prev_w = w; + + if (num_weight_levels <= 4) + weight2_bits.put_bits((uint8_t)w_to_code, 2); + else if (num_weight_levels <= 8) + weight3_bits.put_bits((uint8_t)w_to_code, 4); + else if (num_weight_levels <= 16) + weight4_bits.put_bits((uint8_t)w_to_code, 4); + else + weight8_bits.push_back((uint8_t)w_to_code); + + } // weight_iter + + } // plane_iter + } + + } // bx + + if (cur_run_len) + { + assert(cur_run_len <= FULL_ZSTD_MAX_RUN_LEN); + + total_runs++; + total_run_blocks += cur_run_len; + mode_bytes.push_back((uint8_t)((uint32_t)basist::astc_ldr_t::xuastc_zstd_mode::cMODE_RUN | ((cur_run_len - 1) << 2))); + cur_run_len = 0; + } + + } // by + + raw_bits.put_bits(basist::astc_ldr_t::FINAL_SYNC_MARKER, basist::astc_ldr_t::FINAL_SYNC_MARKER_BITS); + + raw_bits.flush(); + endpoint_dpcm_3bit.flush(); + endpoint_dpcm_4bit.flush(); + use_bc_bits.flush(); + + mean0_bits.flush(); + sign_bits.flush(); + weight2_bits.flush(); + weight3_bits.flush(); + weight4_bits.flush(); + + const uint32_t zstd_level = 9; + + uint8_vec comp_mode, comp_solid_dpcm, comp_endpoint_dpcm_reuse_indices; + uint8_vec comp_use_bc_bits, comp_endpoint_dpcm_3bit, comp_endpoint_dpcm_4bit, comp_endpoint_dpcm_5bit, comp_endpoint_dpcm_6bit, comp_endpoint_dpcm_7bit, comp_endpoint_dpcm_8bit; + + // Mode + if (!zstd_compress(mode_bytes, comp_mode, zstd_level)) return false; + if (!zstd_compress(solid_dpcm_bytes, comp_solid_dpcm, zstd_level)) return false; + + // Endpoints + if (!zstd_compress(endpoint_dpcm_reuse_indices, comp_endpoint_dpcm_reuse_indices, zstd_level)) return false; + if (!zstd_compress(use_bc_bits, comp_use_bc_bits, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_3bit, comp_endpoint_dpcm_3bit, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_4bit, comp_endpoint_dpcm_4bit, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_5bit, comp_endpoint_dpcm_5bit, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_6bit, comp_endpoint_dpcm_6bit, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_7bit, comp_endpoint_dpcm_7bit, zstd_level)) return false; + if (!zstd_compress(endpoint_dpcm_8bit, comp_endpoint_dpcm_8bit, zstd_level)) return false; + + // Weights + uint8_vec comp_mean0, comp_mean1, comp_run, comp_coeff, comp_weight2, comp_weight3, comp_weight4, comp_weight8; + + if (!zstd_compress(mean0_bits, comp_mean0, zstd_level)) return false; + if (!zstd_compress(mean1_bytes, comp_mean1, zstd_level)) return false; + if (!zstd_compress(run_bytes, comp_run, zstd_level)) return false; + if (!zstd_compress(coeff_bytes, comp_coeff, zstd_level)) return false; + if (!zstd_compress(weight2_bits, comp_weight2, zstd_level)) return false; + if (!zstd_compress(weight3_bits, comp_weight3, zstd_level)) return false; + if (!zstd_compress(weight4_bits, comp_weight4, zstd_level)) return false; + if (!zstd_compress(weight8_bits, comp_weight8, zstd_level)) return false; + + basist::astc_ldr_t::xuastc_ldr_full_zstd_header hdr; + clear_obj(hdr); + + hdr.m_flags = (uint8_t)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd; + + hdr.m_raw_bits_len = (uint32_t)raw_bits.get_bytes().size(); + hdr.m_mode_bytes_len = (uint32_t)comp_mode.size(); + hdr.m_solid_dpcm_bytes_len = (uint32_t)comp_solid_dpcm.size(); + + hdr.m_endpoint_dpcm_reuse_indices_len = (uint32_t)comp_endpoint_dpcm_reuse_indices.size(); + hdr.m_use_bc_bits_len = (uint32_t)comp_use_bc_bits.size(); + hdr.m_endpoint_dpcm_3bit_len = (uint32_t)comp_endpoint_dpcm_3bit.size(); + hdr.m_endpoint_dpcm_4bit_len = (uint32_t)comp_endpoint_dpcm_4bit.size(); + hdr.m_endpoint_dpcm_5bit_len = (uint32_t)comp_endpoint_dpcm_5bit.size(); + hdr.m_endpoint_dpcm_6bit_len = (uint32_t)comp_endpoint_dpcm_6bit.size(); + hdr.m_endpoint_dpcm_7bit_len = (uint32_t)comp_endpoint_dpcm_7bit.size(); + hdr.m_endpoint_dpcm_8bit_len = (uint32_t)comp_endpoint_dpcm_8bit.size(); + + hdr.m_mean0_bits_len = (uint32_t)comp_mean0.size(); + hdr.m_mean1_bytes_len = (uint32_t)comp_mean1.size(); + hdr.m_run_bytes_len = (uint32_t)comp_run.size(); + hdr.m_coeff_bytes_len = (uint32_t)comp_coeff.size(); + hdr.m_sign_bits_len = (uint32_t)sign_bits.get_bytes().size(); + hdr.m_weight2_bits_len = (uint32_t)comp_weight2.size(); + hdr.m_weight3_bits_len = (uint32_t)comp_weight3.size(); + hdr.m_weight4_bits_len = (uint32_t)comp_weight4.size(); + hdr.m_weight8_bytes_len = (uint32_t)comp_weight8.size(); + + comp_data.reserve(8192); + + comp_data.resize(sizeof(hdr)); + memcpy(comp_data.data(), &hdr, sizeof(hdr)); + + comp_data.append(raw_bits.get_bytes()); + comp_data.append(comp_mode); + comp_data.append(comp_solid_dpcm); + + comp_data.append(comp_endpoint_dpcm_reuse_indices); + comp_data.append(comp_use_bc_bits); + comp_data.append(comp_endpoint_dpcm_3bit); + comp_data.append(comp_endpoint_dpcm_4bit); + comp_data.append(comp_endpoint_dpcm_5bit); + comp_data.append(comp_endpoint_dpcm_6bit); + comp_data.append(comp_endpoint_dpcm_7bit); + comp_data.append(comp_endpoint_dpcm_8bit); + + comp_data.append(comp_mean0); + comp_data.append(comp_mean1); + comp_data.append(comp_run); + comp_data.append(comp_coeff); + comp_data.append(sign_bits.get_bytes()); + comp_data.append(comp_weight2); + comp_data.append(comp_weight3); + comp_data.append(comp_weight4); + comp_data.append(comp_weight8); + + if (comp_data.size() > UINT32_MAX) + return false; + + if ((global_cfg.m_debug_images) || (global_cfg.m_debug_output)) + { + image coded_img(width, height); + + vector2D phys_blocks(num_blocks_x, num_blocks_y); + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const astc_helpers::log_astc_block& log_blk = coded_blocks(bx, by); + + color_rgba block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + bool status = astc_helpers::decode_block(log_blk, block_pixels, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status) + { + fmt_error_printf("astc_helpers::decode_block() failed\n"); + return false; + } + + // Be positive the logical block can be unpacked correctly as XUASTC LDR. + color_rgba block_pixels_alt[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool status_alt = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels_alt, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status_alt) + { + fmt_error_printf("astc_helpers::decode_block_xuastc_ldr() failed\n"); + return false; + } + + if (memcmp(block_pixels, block_pixels_alt, sizeof(color_rgba) * block_width * block_height) != 0) + { + fmt_error_printf("astc_helpers::decode_block_xuastc_ldr() decode pixel mismatch\n"); + return false; + } + + coded_img.set_block_clipped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + + } // bx + + } //by + + if (global_cfg.m_debug_images) + save_png(global_cfg.m_debug_file_prefix + "coded_img.png", coded_img); + + if (global_cfg.m_debug_output) + { + debug_printf("Orig image vs. coded img:\n"); + print_image_metrics(orig_img, coded_img); + } + } + + if (global_cfg.m_debug_output) + { + fmt_debug_printf("Zstd compressed sizes:\n"); + + fmt_debug_printf(" Raw bytes: {}\n", (uint64_t)raw_bits.get_bytes().size()); + fmt_debug_printf(" Mode bytes: {}, comp size: {}\n", (uint64_t)mode_bytes.size(), (uint64_t)comp_mode.size()); + fmt_debug_printf(" Solid DPCM bytes: {}, comp size: {}\n", (uint64_t)solid_dpcm_bytes.size(), (uint64_t)comp_solid_dpcm.size()); + + fmt_debug_printf(" \n Endpoint DPCM Reuse Bytes: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_reuse_indices.size(), (uint64_t)comp_endpoint_dpcm_reuse_indices.size()); + fmt_debug_printf(" Use BC bits bytes: {}, comp_size: {}\n", (uint64_t)use_bc_bits.get_bytes().size(), (uint64_t)comp_use_bc_bits.size()); + fmt_debug_printf(" Endpoint DPCM 3 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_3bit.get_bytes().size(), (uint64_t)comp_endpoint_dpcm_3bit.size()); + fmt_debug_printf(" Endpoint DPCM 4 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_4bit.get_bytes().size(), (uint64_t)comp_endpoint_dpcm_4bit.size()); + fmt_debug_printf(" Endpoint DPCM 5 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_5bit.size(), (uint64_t)comp_endpoint_dpcm_5bit.size()); + fmt_debug_printf(" Endpoint DPCM 6 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_6bit.size(), (uint64_t)comp_endpoint_dpcm_6bit.size()); + fmt_debug_printf(" Endpoint DPCM 7 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_7bit.size(), (uint64_t)comp_endpoint_dpcm_7bit.size()); + fmt_debug_printf(" Endpoint DPCM 8 bits: {}, comp size: {}\n", (uint64_t)endpoint_dpcm_8bit.size(), (uint64_t)comp_endpoint_dpcm_8bit.size()); + + fmt_debug_printf(" \n Mean0 bytes: {} comp size: {}\n", (uint64_t)mean0_bits.get_bytes().size(), (uint64_t)comp_mean0.size()); + fmt_debug_printf(" Mean1 bytes: {} comp size: {}\n", (uint64_t)mean1_bytes.size(), (uint64_t)comp_mean1.size()); + fmt_debug_printf(" Run bytes: {} comp size: {}\n", (uint64_t)run_bytes.size(), (uint64_t)comp_run.size()); + fmt_debug_printf(" Coeff bytes: {} comp size: {}\n", (uint64_t)coeff_bytes.size(), (uint64_t)comp_coeff.size()); + fmt_debug_printf(" Sign bytes: {}\n", (uint64_t)sign_bits.get_bytes().size()); + fmt_debug_printf(" Weight2 bytes: {} comp size: {}\n", (uint64_t)weight2_bits.get_bytes().size(), (uint64_t)comp_weight2.size()); + fmt_debug_printf(" Weight3 bytes: {} comp size: {}\n", (uint64_t)weight3_bits.get_bytes().size(), (uint64_t)comp_weight3.size()); + fmt_debug_printf(" Weight4 bytes: {} comp size: {}\n", (uint64_t)weight4_bits.get_bytes().size(), (uint64_t)comp_weight4.size()); + fmt_debug_printf(" Weight8 bytes: {} comp size: {}\n", (uint64_t)weight8_bits.size(), (uint64_t)comp_weight8.size()); + + fmt_debug_printf("\nTotal blocks: {}\n", total_blocks); + fmt_debug_printf("Total runs: {}, run blocks: {}, non-run blocks: {}\n", total_runs, total_run_blocks, total_nonrun_blocks); + fmt_debug_printf("Total lossy replacements: {}\n", total_lossy_replacements); + fmt_debug_printf("Total solid blocks: {}\n", total_solid_blocks); + fmt_debug_printf("Total full reuse commands: {}\n", total_full_reuse_commands); + fmt_debug_printf("Total raw commands: {}\n", total_raw_commands); + fmt_debug_printf("Total reuse full cfg emitted: {}\n", total_reuse_full_cfg_emitted); + fmt_debug_printf("Total full cfg emitted: {}\n", total_full_cfg_emitted); + fmt_debug_printf("Num part hash probes: {}, num part hash hits: {}\n", num_part_hash_probes, num_part_hash_hits); + fmt_debug_printf("Total used endpoint dpcm: {}, total used endpoint raw: {}\n", total_used_endpoint_dpcm, total_used_endpoint_raw); + fmt_debug_printf("Total used weight DCT: {}, total used weight DPCM: {}\n", total_used_dct, total_used_weight_dpcm); + fmt_debug_printf("Total tm hash probes: {}, total tm hash_hits: {}\n", num_tm_hash_probes, num_tm_hash_hits); + + fmt_debug_printf("\nCompressed to {} bytes, {3.3}bpp\n\n", comp_data.size_u32(), ((float)comp_data.size() * 8.0f) / (float)total_pixels); + } + + return true; +} +#endif + +bool compress_image( + const image& orig_img, uint8_vec& comp_data, vector2D& coded_blocks, + const astc_ldr_encode_config& global_cfg, + job_pool& job_pool) +{ + assert(g_initialized); + + if (global_cfg.m_debug_output) + { + fmt_debug_printf("\n------------------- astc_ldr::compress_image\n"); + + fmt_debug_printf("\nglobal_cfg:\n"); + global_cfg.debug_print(); + fmt_debug_printf("\n"); + } + + comp_data.resize(0); + + if (!g_initialized) + return false; + + const uint32_t width = orig_img.get_width(), height = orig_img.get_height(); + + if (!is_in_range(width, 1, (int)MAX_WIDTH) || !is_in_range(height, 1, (int)MAX_HEIGHT)) + return false; + + if (!astc_helpers::is_valid_block_size(global_cfg.m_astc_block_width, global_cfg.m_astc_block_height)) + return false; + + const uint32_t block_width = global_cfg.m_astc_block_width; + const uint32_t block_height = global_cfg.m_astc_block_height; + const uint32_t total_block_pixels = block_width * block_height; + + const uint32_t total_pixels = width * height; + const uint32_t num_blocks_x = (width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (height + block_height - 1) / block_height; + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + const bool has_alpha = orig_img.has_alpha(); + + if (global_cfg.m_debug_output) + fmt_debug_printf("Encoding image dimensions {}x{}, has alpha: {}\n", orig_img.get_width(), orig_img.get_height(), has_alpha); + + ldr_astc_block_encode_image_high_level_config enc_cfg; + + enc_cfg.m_block_width = block_width; + enc_cfg.m_block_height = block_height; + enc_cfg.m_pJob_pool = &job_pool; + + enc_cfg.m_use_dct = global_cfg.m_use_dct; + + if (!is_in_range(global_cfg.m_dct_quality, 1.0f, 100.0f)) + return false; + + const int int_q = clamp((int)std::round(global_cfg.m_dct_quality * 2.0f), 0, 200); + enc_cfg.m_base_q = (float)int_q / 2.0f; + + if (global_cfg.m_debug_output) + fmt_debug_printf("Use DCT: {}, base q: {}, lossy supercompression: {}\n", enc_cfg.m_use_dct, enc_cfg.m_base_q, global_cfg.m_lossy_supercompression); + + const float replacement_min_psnr = has_alpha ? global_cfg.m_replacement_min_psnr_alpha : global_cfg.m_replacement_min_psnr; + const float psnr_trial_diff_thresh = has_alpha ? global_cfg.m_psnr_trial_diff_thresh_alpha : global_cfg.m_psnr_trial_diff_thresh; + const float psnr_trial_diff_thresh_edge = has_alpha ? global_cfg.m_psnr_trial_diff_thresh_edge_alpha : global_cfg.m_psnr_trial_diff_thresh_edge; + + enc_cfg.m_blurring_enabled = global_cfg.m_block_blurring_p1; + enc_cfg.m_blurring_enabled_p2 = global_cfg.m_block_blurring_p2; + + for (uint32_t i = 0; i < 4; i++) + { + enc_cfg.m_cem_enc_params.m_comp_weights[i] = global_cfg.m_comp_weights[i]; + + if (!is_in_range(global_cfg.m_comp_weights[i], 1, 256)) + return false; + } + + int cfg_effort_level = global_cfg.m_effort_level; + if (global_cfg.m_debug_output) + fmt_debug_printf("Using cfg effort level: {}\n", cfg_effort_level); + + configure_encoder_effort_level(cfg_effort_level, enc_cfg); + + if (global_cfg.m_force_disable_subsets) + { + enc_cfg.m_subsets_enabled = false; + enc_cfg.m_second_pass_force_subsets_enabled = false; + } + + if (global_cfg.m_force_disable_rgb_dual_plane) + { + enc_cfg.m_disable_rgb_dual_plane = true; + enc_cfg.m_force_all_dp_chans_p2 = false; + } + + enc_cfg.m_cem_enc_params.m_decode_mode_srgb = global_cfg.m_astc_decode_mode_srgb; + + enc_cfg.m_debug_output = global_cfg.m_debug_output; + enc_cfg.m_debug_images = global_cfg.m_debug_images; + enc_cfg.m_debug_file_prefix = global_cfg.m_debug_file_prefix; + + ldr_astc_block_encode_image_output enc_out; + + const bool enc_status = ldr_astc_block_encode_image(orig_img, enc_cfg, enc_out); + + if (global_cfg.m_debug_output) + fmt_debug_printf("ldr_astc_block_encode_image: {}\n", enc_status); + + if (!enc_status) + return false; + + basist::astc_ldr_t::xuastc_ldr_syntax syntax = global_cfg.m_compressed_syntax; + + if (syntax >= basist::astc_ldr_t::xuastc_ldr_syntax::cTotal) + { + assert(0); + return false; + } + + // Switch to full adaptive arithmetic coding on the smallest mipmaps to avoid ZStd overhead. + const uint32_t DISABLE_FASTER_FORMAT_TOTAL_BLOCKS_THRESH = 64; + if (total_blocks <= DISABLE_FASTER_FORMAT_TOTAL_BLOCKS_THRESH) + syntax = basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith; + + if (syntax == basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd) + { +#if BASISD_SUPPORT_KTX2_ZSTD + // Full ZStd syntax is so different we'll move that to another function. + return compress_image_full_zstd( + orig_img, comp_data, coded_blocks, + global_cfg, + job_pool, + enc_cfg, enc_out); +#else + fmt_error_printf("Full ZStd syntax not supported in this build (set BASISD_SUPPORT_KTX2_ZSTD to 1)\n"); + return false; +#endif + } + + const bool use_faster_format = (syntax == basist::astc_ldr_t::xuastc_ldr_syntax::cHybridArithZStd); + +#if !BASISD_SUPPORT_KTX2_ZSTD + if (use_faster_format) + { + fmt_error_printf("Full ZStd syntax not supported in this build (set BASISD_SUPPORT_KTX2_ZSTD to 1)\n"); + return false; + } +#endif + + // Either full arithmetic, or hybrid arithmetic+ZStd for weight symbols. + basist::astc_ldr_t::xuastc_ldr_arith_header hdr; + clear_obj(hdr); + + bitwise_coder mean0_bits; + uint8_vec mean1_bytes; + uint8_vec run_bytes; + uint8_vec coeff_bytes; + bitwise_coder sign_bits; + bitwise_coder weight2_bits; + bitwise_coder weight3_bits; + bitwise_coder weight4_bits; + uint8_vec weight8_bits; + + if (use_faster_format) + { + mean0_bits.init(1024); + mean1_bytes.reserve(1024); + run_bytes.reserve(8192); + coeff_bytes.reserve(8192); + sign_bits.init(1024); + weight2_bits.init(1024); + weight3_bits.init(1024); + weight4_bits.init(1024); + weight8_bits.reserve(8192); + } + + interval_timer itm; + itm.start(); + + basist::arith::arith_enc enc; + enc.init(1024 * 1024); + + enc.put_bits(basist::astc_ldr_t::ARITH_HEADER_MARKER, basist::astc_ldr_t::ARITH_HEADER_MARKER_BITS); + + const int block_dim_index = astc_helpers::find_astc_block_size_index(block_width, block_height); + assert((block_dim_index >= 0) && (block_dim_index < (int)astc_helpers::NUM_ASTC_BLOCK_SIZES)); + + enc.put_bits(block_dim_index, 4); + + enc.put_bit(enc_cfg.m_cem_enc_params.m_decode_mode_srgb); + + enc.put_bits(width, 16); + enc.put_bits(height, 16); + + enc.put_bit(has_alpha); + + enc.put_bits(enc_cfg.m_use_dct, 1); + if (enc_cfg.m_use_dct) + enc.put_bits(int_q, 8); + + basist::arith::arith_data_model mode_model((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_TOTAL); + + basist::arith::arith_data_model solid_color_dpcm_model[4]; + for (uint32_t i = 0; i < 4; i++) + solid_color_dpcm_model[i].init(256, true); + + basist::arith::arith_data_model raw_endpoint_models[astc_helpers::TOTAL_ENDPOINT_ISE_RANGES]; + for (uint32_t i = 0; i < astc_helpers::TOTAL_ENDPOINT_ISE_RANGES; i++) + raw_endpoint_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i)); + + basist::arith::arith_data_model dpcm_endpoint_models[astc_helpers::TOTAL_ENDPOINT_ISE_RANGES]; + for (uint32_t i = 0; i < astc_helpers::TOTAL_ENDPOINT_ISE_RANGES; i++) + dpcm_endpoint_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i)); + + basist::arith::arith_data_model raw_weight_models[astc_helpers::TOTAL_WEIGHT_ISE_RANGES]; + for (uint32_t i = 0; i < astc_helpers::TOTAL_WEIGHT_ISE_RANGES; i++) + raw_weight_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + i)); + + basist::arith::arith_bit_model is_base_ofs_model; + basist::arith::arith_bit_model use_dct_model[4]; + basist::arith::arith_bit_model use_dpcm_endpoints_model; + + basist::arith::arith_data_model cem_index_model[8]; + for (uint32_t i = 0; i < 8; i++) + cem_index_model[i].init(basist::astc_ldr_t::OTM_NUM_CEMS); + + basist::arith::arith_data_model subset_index_model[basist::astc_ldr_t::OTM_NUM_SUBSETS]; + for (uint32_t i = 0; i < basist::astc_ldr_t::OTM_NUM_SUBSETS; i++) + subset_index_model[i].init(basist::astc_ldr_t::OTM_NUM_SUBSETS); + + basist::arith::arith_data_model ccs_index_model[basist::astc_ldr_t::OTM_NUM_CCS]; + for (uint32_t i = 0; i < basist::astc_ldr_t::OTM_NUM_CCS; i++) + ccs_index_model[i].init(basist::astc_ldr_t::OTM_NUM_CCS); + + basist::arith::arith_data_model grid_size_model[basist::astc_ldr_t::OTM_NUM_GRID_SIZES]; + for (uint32_t i = 0; i < basist::astc_ldr_t::OTM_NUM_GRID_SIZES; i++) + grid_size_model[i].init(basist::astc_ldr_t::OTM_NUM_GRID_SIZES); + + basist::arith::arith_data_model grid_aniso_model[basist::astc_ldr_t::OTM_NUM_GRID_ANISOS]; + for (uint32_t i = 0; i < basist::astc_ldr_t::OTM_NUM_GRID_ANISOS; i++) + grid_aniso_model[i].init(basist::astc_ldr_t::OTM_NUM_GRID_ANISOS); + + basist::arith::arith_data_model dct_run_len_model(65); // [0,63] or 64=EOB + basist::arith::arith_data_model dct_coeff_mag(255); // [1,255] (blocks with larger mags go DPCM) + + double total_header_bits = 0.0f, total_weight_bits = 0.0f, total_endpoint_bits = 0.0f; + + uint32_t total_solid_blocks = 0, total_used_dct = 0, total_used_weight_dpcm = 0; + + basist::astc_ldr_t::grid_weight_dct grid_dct; + grid_dct.init(block_width, block_height); + + vector2D prev_block_states(num_blocks_x, num_blocks_y); + + coded_blocks.resize(num_blocks_x, num_blocks_y); + for (uint32_t y = 0; y < num_blocks_y; y++) + for (uint32_t x = 0; x < num_blocks_x; x++) + coded_blocks(x, y).clear(); + + const bool endpoint_dpcm_global_enable = true; + uint32_t total_used_endpoint_dpcm = 0, total_used_endpoint_raw = 0; + + basist::arith::arith_data_model submode_models[basist::astc_ldr_t::OTM_NUM_CEMS][basist::astc_ldr_t::OTM_NUM_SUBSETS][basist::astc_ldr_t::OTM_NUM_CCS][basist::astc_ldr_t::OTM_NUM_GRID_SIZES][basist::astc_ldr_t::OTM_NUM_GRID_ANISOS]; + + basist::arith::arith_bit_model endpoints_use_bc_models[4]; + + basist::arith::arith_data_model endpoint_reuse_delta_model(basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS); + + basist::arith::arith_data_model weight_mean_models[2]; + weight_mean_models[0].init(basist::astc_ldr_t::DCT_MEAN_LEVELS0); + weight_mean_models[1].init(basist::astc_ldr_t::DCT_MEAN_LEVELS1); + + basist::arith::arith_data_model config_reuse_model[4]; + for (uint32_t i = 0; i < 4; i++) + config_reuse_model[i].init(basist::astc_ldr_t::cMaxConfigReuseNeighbors + 1); + + uint32_t total_reuse_full_cfg_emitted = 0, total_full_cfg_emitted = 0; + + // TODO: check weights for >= 0 + const float total_comp_weights = enc_cfg.m_cem_enc_params.get_total_comp_weights(); + + uint32_t total_lossy_replacements = 0; + uint32_t total_full_reuse_commands = 0; + uint32_t total_raw_commands = 0; + + if (global_cfg.m_debug_output) + fmt_debug_printf("Supercompressor init time: {} secs\n", itm.get_elapsed_secs()); + + uint32_t total_runs = 0, total_run_blocks = 0; + uint32_t cur_run_len = 0; + const bool use_run_commands = true; + uint32_t total_nonrun_blocks = 0; + + int part2_hash[basist::astc_ldr_t::PART_HASH_SIZE]; + std::fill(part2_hash, part2_hash + basist::astc_ldr_t::PART_HASH_SIZE, -1); + + int part3_hash[basist::astc_ldr_t::PART_HASH_SIZE]; + std::fill(part3_hash, part3_hash + basist::astc_ldr_t::PART_HASH_SIZE, -1); + + basist::arith::arith_bit_model use_part_hash_model[4]; + basist::arith::arith_data_model part2_hash_index_model(basist::astc_ldr_t::PART_HASH_SIZE, true); + basist::arith::arith_data_model part3_hash_index_model(basist::astc_ldr_t::PART_HASH_SIZE, true); + + uint32_t num_part_hash_probes = 0, num_part_hash_hits = 0; + uint32_t total_dct_syms = 0, total_dpcm_syms = 0; + + basist::arith::arith_gamma_contexts m_run_len_contexts; + + image vis_img; + if (global_cfg.m_debug_images) + { + vis_img.resize(width, height); + } + + itm.start(); + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + const uint32_t base_y = by * block_height; + + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const uint32_t base_x = bx * block_width; + + basist::astc_ldr_t::prev_block_state& prev_state = prev_block_states(bx, by); + const basist::astc_ldr_t::prev_block_state* pLeft_state = bx ? &prev_block_states(bx - 1, by) : nullptr; + const basist::astc_ldr_t::prev_block_state* pUpper_state = by ? &prev_block_states(bx, by - 1) : nullptr; + const basist::astc_ldr_t::prev_block_state* pDiag_state = (bx && by) ? &prev_block_states(bx - 1, by - 1) : nullptr; + const basist::astc_ldr_t::prev_block_state* pPred_state = pLeft_state ? pLeft_state : pUpper_state; // left or upper, or nullptr on first block + + const ldr_astc_block_encode_image_output::block_info& blk_info = enc_out.m_image_block_info(bx, by); + + uint32_t best_packed_out_block_index = blk_info.m_packed_out_block_index; + + // check for run + if ((use_run_commands) && (bx || by)) + { + const encode_block_output& blk_out = blk_info.m_out_blocks[best_packed_out_block_index]; + const astc_helpers::log_astc_block& cur_log_blk = blk_out.m_log_blk; + + const astc_helpers::log_astc_block& prev_log_blk = bx ? coded_blocks(bx - 1, by) : coded_blocks(0, by - 1); + const basist::astc_ldr_t::prev_block_state* pPrev_block_state = bx ? pLeft_state : pUpper_state; + + assert(pPrev_block_state); + + if (compare_log_blocks_for_equality(cur_log_blk, prev_log_blk)) + { + // Left or upper is exactly the same logical block, so expand the run. + cur_run_len++; + + // Accept the previous block (left or upper) as if it's been coded normally. + + coded_blocks(bx, by) = prev_log_blk; + + prev_state.m_was_solid_color = pPrev_block_state->m_was_solid_color; + prev_state.m_used_weight_dct = pPrev_block_state->m_used_weight_dct; + prev_state.m_first_endpoint_uses_bc = pPrev_block_state->m_first_endpoint_uses_bc; + prev_state.m_reused_full_cfg = true; + prev_state.m_used_part_hash = pPrev_block_state->m_used_part_hash; + prev_state.m_tm_index = pPrev_block_state->m_tm_index; + prev_state.m_base_cem_index = pPrev_block_state->m_base_cem_index; + prev_state.m_subset_index = pPrev_block_state->m_subset_index; + prev_state.m_ccs_index = pPrev_block_state->m_ccs_index; + prev_state.m_grid_size = pPrev_block_state->m_grid_size; + prev_state.m_grid_aniso = pPrev_block_state->m_grid_aniso; + + continue; + } + } + + if (cur_run_len) + { + total_runs++; + total_run_blocks += cur_run_len; + + total_header_bits += enc.encode_and_return_price((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_RUN, mode_model); + total_header_bits += enc.put_gamma_and_return_price(cur_run_len, m_run_len_contexts); + cur_run_len = 0; + } + + total_nonrun_blocks++; + + const float ref_wmse = (float)blk_info.m_out_blocks[best_packed_out_block_index].m_sse / (total_comp_weights * (float)total_block_pixels); + const float ref_wpsnr = (ref_wmse > 1e-5f) ? 20.0f * log10f(255.0f / sqrtf(ref_wmse)) : 10000.0f; + + if ((global_cfg.m_lossy_supercompression) && (ref_wpsnr >= replacement_min_psnr) && + (!blk_info.m_out_blocks[blk_info.m_packed_out_block_index].m_log_blk.m_solid_color_flag_ldr)) + { + const float psnr_thresh = blk_info.m_strong_edges ? psnr_trial_diff_thresh_edge : psnr_trial_diff_thresh; + + float best_alt_wpsnr = 0.0f; + bool found_alternative = false; + + // Pass: 0 consider full config+part ID endpoint reuse + // Pass: 1 fall back to just full config+part ID reuse (no endpoints) + for (uint32_t pass = 0; pass < 2; pass++) + { + // Iterate through all available alternative candidates + for (uint32_t out_block_iter = 0; out_block_iter < blk_info.m_out_blocks.size(); out_block_iter++) + { + if (out_block_iter == blk_info.m_packed_out_block_index) + continue; + + const float trial_wmse = (float)blk_info.m_out_blocks[out_block_iter].m_sse / (total_comp_weights * (float)total_block_pixels); + const float trial_wpsnr = (trial_wmse > 1e-5f) ? 20.0f * log10f(255.0f / sqrtf(trial_wmse)) : 10000.0f; + + // Reject if PSNR too low + if (trial_wpsnr < (ref_wpsnr - psnr_thresh)) + continue; + + // Reject if inferior than best found so far + if (trial_wpsnr < best_alt_wpsnr) + continue; + + const astc_helpers::log_astc_block& trial_log_blk = blk_info.m_out_blocks[out_block_iter].m_log_blk; + + if (trial_log_blk.m_solid_color_flag_ldr) + continue; + + // Examine nearby neighbors + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; break; + case 1: dy = -1; break; + case 2: dx = -1; dy = -1; break; + default: assert(0); break; + } + + const int n_bx = bx + dx, n_by = by + dy; + if ((n_bx < 0) || (n_by < 0)) + continue; + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (neighbor_log_blk.m_solid_color_flag_ldr) + continue; + + bool accept_flag = false; + if (pass == 0) + { + // prefer full config+endpoint equality first + accept_flag = compare_log_block_configs_and_endpoints(trial_log_blk, neighbor_log_blk); + } + else + { + // next check for just config equality + accept_flag = compare_log_block_configs(trial_log_blk, neighbor_log_blk); + } + + if (accept_flag) + { + best_alt_wpsnr = trial_wpsnr; + best_packed_out_block_index = out_block_iter; + found_alternative = true; + break; + } + + } // i + + } // out_block_iter + + if (found_alternative) + break; + + } // pass + + if (best_packed_out_block_index != blk_info.m_packed_out_block_index) + total_lossy_replacements++; + + } // global_cfg.m_lossy_supercompression + + const encode_block_output& blk_out = blk_info.m_out_blocks[best_packed_out_block_index]; + + astc_helpers::log_astc_block& cur_log_blk = coded_blocks(bx, by); + + cur_log_blk = blk_out.m_log_blk; + + // TODO: Add mode model context + + if (blk_out.m_trial_mode_index < 0) + { + assert(cur_log_blk.m_solid_color_flag_ldr); + + total_solid_blocks++; + + //total_header_bits += mode_model.get_price(cMODE_SOLID) + (float)(8 * (has_alpha ? 4 : 3)); + total_header_bits += mode_model.get_price((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_SOLID); + enc.encode((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_SOLID, mode_model); + + uint32_t cur_solid_color[4]; + for (uint32_t i = 0; i < 4; i++) + cur_solid_color[i] = blk_out.m_log_blk.m_solid_color[i] >> 8; + + uint32_t prev_solid_color[4] = { 0 }; + + const uint32_t num_comps = has_alpha ? 4 : 3; + + astc_helpers::log_astc_block* pPrev_log_blk = bx ? &coded_blocks(bx - 1, by) : (by ? &coded_blocks(bx, by - 1) : nullptr); + if (pPrev_log_blk) + { + if (pPrev_log_blk->m_solid_color_flag_ldr) + { + prev_solid_color[0] = pPrev_log_blk->m_solid_color[0] >> 8; + prev_solid_color[1] = pPrev_log_blk->m_solid_color[1] >> 8; + prev_solid_color[2] = pPrev_log_blk->m_solid_color[2] >> 8; + prev_solid_color[3] = pPrev_log_blk->m_solid_color[3] >> 8; + } + else + { +#if 0 + color_rgba prev_block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool dec_status = astc_helpers::decode_block(*pPrev_log_blk, prev_block_pixels, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!dec_status) + { + fmt_error_printf("decode_block() failed\n"); + return false; + } + + for (uint32_t i = 0; i < total_block_pixels; i++) + { + for (uint32_t j = 0; j < num_comps; j++) + prev_solid_color[j] += prev_block_pixels[i][j]; + } + + for (uint32_t j = 0; j < num_comps; j++) + prev_solid_color[j] = (prev_solid_color[j] + (total_block_pixels / 2)) / total_block_pixels; +#endif + // Decode previous block's first CEM, use the halfway point as the predictor. + color_rgba prev_l, prev_h; + decode_endpoints(pPrev_log_blk->m_color_endpoint_modes[0], pPrev_log_blk->m_endpoints, pPrev_log_blk->m_endpoint_ise_range, prev_l, prev_h); + + prev_solid_color[0] = (prev_l[0] + prev_h[0] + 1) >> 1; + prev_solid_color[1] = (prev_l[1] + prev_h[1] + 1) >> 1; + prev_solid_color[2] = (prev_l[2] + prev_h[2] + 1) >> 1; + prev_solid_color[3] = (prev_l[3] + prev_h[3] + 1) >> 1; + } + } + + for (uint32_t i = 0; i < num_comps; i++) + { + const uint32_t delta = (cur_solid_color[i] - prev_solid_color[i]) & 0xFF; + + total_header_bits += enc.encode_and_return_price(delta, solid_color_dpcm_model[i]); + } + + // Bias the statistics towards using DCT (most common case). + prev_state.m_was_solid_color = true; + prev_state.m_used_weight_dct = enc_cfg.m_use_dct; + prev_state.m_first_endpoint_uses_bc = true; + prev_state.m_tm_index = -1; + prev_state.m_base_cem_index = astc_helpers::CEM_LDR_RGB_DIRECT; + prev_state.m_subset_index = 0; + prev_state.m_ccs_index = 0; + prev_state.m_grid_size = 0; + prev_state.m_grid_aniso = 0; + prev_state.m_reused_full_cfg = false; + prev_state.m_used_part_hash = true; // bias to true + + continue; + } + + //-------------------------------------------- + // for (uint32_t out_block_iter = 0; out_block_iter < blk_info.m_out_blocks.size(); out_block_iter++) + int full_cfg_endpoint_reuse_index = -1; + + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; break; + case 1: dy = -1; break; + case 2: dx = -1; dy = -1; break; + default: assert(0); break; + } + + const int n_bx = bx + dx, n_by = by + dy; + if ((n_bx < 0) || (n_by < 0)) + continue; + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (neighbor_log_blk.m_solid_color_flag_ldr) + continue; + + if (compare_log_block_configs_and_endpoints(cur_log_blk, neighbor_log_blk)) + { + full_cfg_endpoint_reuse_index = i; + break; + } + } // i + //-------------------------------------------- + + if (full_cfg_endpoint_reuse_index >= 0) + { + // Reused full config, part ID and endpoint values from an immediate neighbor + total_header_bits += enc.encode_and_return_price((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_LEFT + full_cfg_endpoint_reuse_index, mode_model); + + total_full_reuse_commands++; + + const basist::astc_ldr_t::prev_block_state* pReused_cfg_state = nullptr; + + switch (full_cfg_endpoint_reuse_index) + { + case 0: pReused_cfg_state = pLeft_state; break; + case 1: pReused_cfg_state = pUpper_state; break; + case 2: pReused_cfg_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pReused_cfg_state) + { + assert(0); + fmt_error_printf("encoding internal failure\n"); + return false; + } + + assert(pReused_cfg_state->m_tm_index == blk_out.m_trial_mode_index); + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + prev_state.m_base_cem_index = pReused_cfg_state->m_base_cem_index; + prev_state.m_subset_index = pReused_cfg_state->m_subset_index; + prev_state.m_ccs_index = pReused_cfg_state->m_ccs_index; + prev_state.m_grid_size = pReused_cfg_state->m_grid_size; + prev_state.m_grid_aniso = pReused_cfg_state->m_grid_aniso; + prev_state.m_used_part_hash = pReused_cfg_state->m_used_part_hash; + prev_state.m_reused_full_cfg = true; + + const uint32_t cur_actual_cem = cur_log_blk.m_color_endpoint_modes[0]; + + if (astc_helpers::cem_supports_bc(cur_actual_cem)) + { + prev_state.m_first_endpoint_uses_bc = astc_helpers::used_blue_contraction(cur_actual_cem, cur_log_blk.m_endpoints, cur_log_blk.m_endpoint_ise_range); + assert(prev_state.m_first_endpoint_uses_bc == pReused_cfg_state->m_first_endpoint_uses_bc); + } + } + else + { + total_raw_commands++; + + // Send mode + total_header_bits += mode_model.get_price((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_RAW); + enc.encode((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_RAW, mode_model); + + const uint32_t cur_actual_cem = cur_log_blk.m_color_endpoint_modes[0]; + //const bool actual_cem_supports_bc = astc_helpers::cem_supports_bc(cur_actual_cem); + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cur_actual_cem); + + // DO NOT use tm.m_cem because the encoder may have selected a base+ofs variant instead. Use cur_actual_cem. + const basist::astc_ldr_t::trial_mode& tm = enc_out.m_encoder_trial_modes[blk_out.m_trial_mode_index]; + + // Check for config+part ID neighbor reuse + int neighbor_cfg_match_index = -1; + for (uint32_t i = 0; i < basist::astc_ldr_t::cMaxConfigReuseNeighbors; i++) + { + const basist::astc_ldr_t::prev_block_state* pNeighbor_state = nullptr; + + int dx = 0, dy = 0; + switch (i) + { + case 0: dx = -1; pNeighbor_state = pLeft_state; break; + case 1: dy = -1; pNeighbor_state = pUpper_state; break; + case 2: dx = -1; dy = -1; pNeighbor_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pNeighbor_state) + continue; + + const int n_bx = bx + dx, n_by = by + dy; + assert((n_bx >= 0) && (n_by >= 0)); + + astc_helpers::log_astc_block& neighbor_log_blk = coded_blocks(n_bx, n_by); + + if (pNeighbor_state->m_tm_index != blk_out.m_trial_mode_index) + continue; + + if (neighbor_log_blk.m_color_endpoint_modes[0] != cur_log_blk.m_color_endpoint_modes[0]) + continue; + + if (neighbor_log_blk.m_partition_id != cur_log_blk.m_partition_id) + continue; + + assert(neighbor_log_blk.m_dual_plane == cur_log_blk.m_dual_plane); + assert(neighbor_log_blk.m_color_component_selector == cur_log_blk.m_color_component_selector); + assert(neighbor_log_blk.m_num_partitions == cur_log_blk.m_num_partitions); + assert(neighbor_log_blk.m_grid_width == cur_log_blk.m_grid_width); + assert(neighbor_log_blk.m_grid_height == cur_log_blk.m_grid_height); + assert(neighbor_log_blk.m_endpoint_ise_range == cur_log_blk.m_endpoint_ise_range); + assert(neighbor_log_blk.m_weight_ise_range == cur_log_blk.m_weight_ise_range); + + neighbor_cfg_match_index = i; + break; + } + + uint32_t reuse_full_cfg_model_index = 0; + if (pLeft_state) + reuse_full_cfg_model_index = pLeft_state->m_reused_full_cfg; + else + reuse_full_cfg_model_index = 1; + + if (pUpper_state) + reuse_full_cfg_model_index |= pUpper_state->m_reused_full_cfg ? 2 : 0; + else + reuse_full_cfg_model_index |= 2; + + if (neighbor_cfg_match_index >= 0) + { + total_header_bits += enc.encode_and_return_price(neighbor_cfg_match_index, config_reuse_model[reuse_full_cfg_model_index]); + + const basist::astc_ldr_t::prev_block_state* pReused_cfg_state = nullptr; + + switch (neighbor_cfg_match_index) + { + case 0: pReused_cfg_state = pLeft_state; break; + case 1: pReused_cfg_state = pUpper_state; break; + case 2: pReused_cfg_state = pDiag_state; break; + default: assert(0); break; + } + + if (!pReused_cfg_state) + { + assert(0); + fmt_error_printf("encoding internal failure\n"); + return false; + } + + assert(pReused_cfg_state->m_tm_index == blk_out.m_trial_mode_index); + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + prev_state.m_base_cem_index = pReused_cfg_state->m_base_cem_index; + prev_state.m_subset_index = pReused_cfg_state->m_subset_index; + prev_state.m_ccs_index = pReused_cfg_state->m_ccs_index; + prev_state.m_grid_size = pReused_cfg_state->m_grid_size; + prev_state.m_grid_aniso = pReused_cfg_state->m_grid_aniso; + prev_state.m_used_part_hash = pReused_cfg_state->m_used_part_hash; + prev_state.m_reused_full_cfg = true; + + total_reuse_full_cfg_emitted++; + } + else + { + total_full_cfg_emitted++; + + total_header_bits += enc.encode_and_return_price(basist::astc_ldr_t::cMaxConfigReuseNeighbors, config_reuse_model[reuse_full_cfg_model_index]); + + // ------------------------------------------- Set TM index + { + uint32_t cem_index, subset_index, ccs_index, grid_size, grid_aniso; + + const uint_vec& submodes = separate_tm_index(block_width, block_height, enc_out.m_grouped_encoder_trial_modes, tm, + cem_index, subset_index, ccs_index, grid_size, grid_aniso); + + // TODO: sort this + uint32_t submode_index; + for (submode_index = 0; submode_index < submodes.size(); submode_index++) + if (submodes[submode_index] == (uint32_t)blk_out.m_trial_mode_index) + break; + + if (submode_index == submodes.size_u32()) + { + assert(0); + fmt_error_printf("Failed finding mode\n"); + return false; + } + + uint32_t prev_cem_index = astc_helpers::CEM_LDR_RGB_DIRECT; + uint32_t prev_subset_index = 0; + uint32_t prev_ccs_index = 0; + uint32_t prev_grid_size = 0; + uint32_t prev_grid_aniso = 0; + + if (pPred_state) + { + prev_cem_index = pPred_state->m_base_cem_index; + prev_subset_index = pPred_state->m_subset_index; + prev_ccs_index = pPred_state->m_ccs_index; + prev_grid_size = pPred_state->m_grid_size; + prev_grid_aniso = pPred_state->m_grid_aniso; + } + + const uint32_t ldrcem_index = basist::astc_ldr_t::cem_to_ldrcem_index(prev_cem_index); + + total_header_bits += cem_index_model[ldrcem_index].get_price(cem_index); + enc.encode(cem_index, cem_index_model[ldrcem_index]); + + total_header_bits += subset_index_model[prev_subset_index].get_price(subset_index); + enc.encode(subset_index, subset_index_model[prev_subset_index]); + + total_header_bits += ccs_index_model[prev_ccs_index].get_price(ccs_index); + enc.encode(ccs_index, ccs_index_model[prev_ccs_index]); + + total_header_bits += grid_size_model[prev_grid_size].get_price(grid_size); + enc.encode(grid_size, grid_size_model[prev_grid_size]); + + total_header_bits += grid_aniso_model[prev_grid_aniso].get_price(grid_aniso); + enc.encode(grid_aniso, grid_aniso_model[prev_grid_aniso]); + + if (submodes.size() > 1) + { + basist::arith::arith_data_model& submode_model = submode_models[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + if (!submode_model.get_num_data_syms()) + submode_model.init(submodes.size_u32(), true); + + total_header_bits += submode_model.get_price(submode_index); + enc.encode(submode_index, submode_model); + } + + prev_state.m_tm_index = blk_out.m_trial_mode_index; + prev_state.m_base_cem_index = cem_index; + prev_state.m_subset_index = subset_index; + prev_state.m_ccs_index = ccs_index; + prev_state.m_grid_size = grid_size; + prev_state.m_grid_aniso = grid_aniso; + prev_state.m_reused_full_cfg = false; + } + + // Send base_ofs bit if the tm is direct + if ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + const bool is_base_ofs = (cur_log_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || + (cur_log_blk.m_color_endpoint_modes[0] == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET); + + total_header_bits += is_base_ofs_model.get_price(is_base_ofs); + enc.encode(is_base_ofs, is_base_ofs_model); + } + + if (tm.m_num_parts > 1) + { + // Send unique part pattern ID + astc_ldr::partitions_data* pPart_data = (tm.m_num_parts == 2) ? &enc_out.m_part_data_p2 : &enc_out.m_part_data_p3; + + const uint32_t astc_pat_index = cur_log_blk.m_partition_id; + const uint32_t unique_pat_index = pPart_data->m_part_seed_to_unique_index[astc_pat_index]; + const uint32_t total_unique_indices = pPart_data->m_total_unique_patterns; + assert(unique_pat_index < total_unique_indices); + + num_part_hash_probes++; + + uint32_t use_part_model_index = 0; + if (pLeft_state) + use_part_model_index = pLeft_state->m_used_part_hash; + else + use_part_model_index = 1; + if (pUpper_state) + use_part_model_index |= pUpper_state->m_used_part_hash ? 2 : 0; + else + use_part_model_index |= 2; + + int* pPart_hash = (tm.m_num_parts == 2) ? part2_hash : part3_hash; + + const uint32_t h = basist::astc_ldr_t::part_hash_index(unique_pat_index); + + if (pPart_hash[h] != (int)unique_pat_index) + { +#if defined(_DEBUG) || defined(DEBUG) + // sanity + for (uint32_t i = 0; i < basist::astc_ldr_t::PART_HASH_SIZE; i++) + { + assert(pPart_hash[i] != (int)unique_pat_index); + } +#endif + + total_header_bits += enc.encode_and_return_price(0, use_part_hash_model[use_part_model_index]); + total_header_bits += enc.put_truncated_binary(unique_pat_index, total_unique_indices); + + if (global_cfg.m_debug_images) + { + vis_img.fill_box(base_x, base_y, block_width, block_height, color_rgba(0, 0, 255, 255)); + } + + prev_state.m_used_part_hash = false; + } + else + { + num_part_hash_hits++; + + if (global_cfg.m_debug_images) + { + vis_img.fill_box(base_x, base_y, block_width, block_height, color_rgba(255, 0, 0, 255)); + } + + total_header_bits += enc.encode_and_return_price(1, use_part_hash_model[use_part_model_index]); + total_header_bits += enc.encode_and_return_price(h, (tm.m_num_parts == 2) ? part2_hash_index_model : part3_hash_index_model); + + prev_state.m_used_part_hash = true; + } + + pPart_hash[basist::astc_ldr_t::part_hash_index(unique_pat_index)] = unique_pat_index; + } + else + { + prev_state.m_used_part_hash = true; // bias to true + } + + } // if (neighbor_cfg_match_index >= 0) + + // ----------------------------------------- Send endpoints + const int num_endpoint_levels = astc_helpers::get_ise_levels(cur_log_blk.m_endpoint_ise_range); + const auto& endpoint_ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(cur_log_blk.m_endpoint_ise_range).m_ISE_to_rank; + + uint32_t bc_model_index = 0; + if (pLeft_state) + bc_model_index = pLeft_state->m_first_endpoint_uses_bc; + else + bc_model_index = 1; + + if (pUpper_state) + bc_model_index |= pUpper_state->m_first_endpoint_uses_bc ? 2 : 0; + else + bc_model_index |= 2; + + bool endpoints_use_bc[astc_helpers::MAX_PARTITIONS] = { false }; + + if (astc_helpers::cem_supports_bc(cur_actual_cem)) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool cur_uses_bc = astc_helpers::used_blue_contraction(cur_actual_cem, cur_log_blk.m_endpoints + part_iter * total_endpoint_vals, cur_log_blk.m_endpoint_ise_range); + + endpoints_use_bc[part_iter] = cur_uses_bc; + + } // part_iter + + prev_state.m_first_endpoint_uses_bc = endpoints_use_bc[0]; + } + + int best_reuse_bx = -1, best_reuse_by = -1; + uint32_t best_reuse_index = 0; + const astc_helpers::log_astc_block* pEndpoint_pred_log_blk = nullptr; + + if (endpoint_dpcm_global_enable) + { + int64_t best_trial_delta2 = INT64_MAX; + float best_trial_bits = BIG_FLOAT_VAL; + + //auto& trial_dpcm_model = dpcm_endpoint_models[cur_log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE]; + + for (uint32_t reuse_index = 0; reuse_index < basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS; reuse_index++) + { + const int rx = (int)bx + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_index].m_x; + const int ry = (int)by + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_index].m_y; + if ((rx < 0) || (ry < 0) || (rx >= (int)num_blocks_x) || (ry >= (int)num_blocks_y)) + continue; + + const astc_helpers::log_astc_block* pTrial_log_blk = &coded_blocks(rx, ry); + if (pTrial_log_blk->m_solid_color_flag_ldr) + continue; + + uint8_t trial_predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + uint32_t part_iter; + for (part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + bool conv_status = basist::astc_ldr_t::convert_endpoints_across_cems( + pTrial_log_blk->m_color_endpoint_modes[0], pTrial_log_blk->m_endpoint_ise_range, pTrial_log_blk->m_endpoints, + cur_actual_cem, cur_log_blk.m_endpoint_ise_range, trial_predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + break; + } // part_iter + + if (part_iter < tm.m_num_parts) + continue; // failed + + int64_t trial_endpoint_delta2 = 0; + for (part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + int cur_e_rank = endpoint_ise_to_rank[cur_log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter]]; + int prev_e_rank = endpoint_ise_to_rank[trial_predicted_endpoints[part_iter][val_iter]]; + + int e_delta = cur_e_rank - prev_e_rank; + + trial_endpoint_delta2 += e_delta * e_delta; + + } // val_iter + + } // part_iter + + const float N = (float)(total_endpoint_vals * tm.m_num_parts); + const float mse = (float)trial_endpoint_delta2 / N; + + // Gaussian entropy estimate - precomputed 0.5 * log2(2*pi*e) = ~2.0470956f + const float k_const = 2.0470956f; + + float bits_per_sym = 0.5f * log2f(basisu::maximum(mse, 1e-9f)) + k_const; + + bits_per_sym = clamp(bits_per_sym, 0.05f, 8.0f); + + // total est bits for this block’s endpoints + float total_est_bits = bits_per_sym * N; + + total_est_bits += endpoint_reuse_delta_model.get_price(reuse_index); + + if (total_est_bits < best_trial_bits) + { + best_trial_delta2 = trial_endpoint_delta2; + best_trial_bits = total_est_bits; + + best_reuse_bx = rx; + best_reuse_by = ry; + best_reuse_index = reuse_index; + + if (!best_trial_delta2) + break; + } + + } // reuse_index + + if (best_reuse_bx >= 0) + { + pEndpoint_pred_log_blk = &coded_blocks(best_reuse_bx, best_reuse_by); + + assert(!pEndpoint_pred_log_blk->m_solid_color_flag_ldr); + } + + } // if (endpoint_dpcm_global_enable) + + uint8_t predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + bool use_dpcm_endpoints = false; + + if (pEndpoint_pred_log_blk) + { + use_dpcm_endpoints = true; + + assert(cur_log_blk.m_num_partitions == tm.m_num_parts); + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + bool conv_status = basist::astc_ldr_t::convert_endpoints_across_cems( + pEndpoint_pred_log_blk->m_color_endpoint_modes[0], pEndpoint_pred_log_blk->m_endpoint_ise_range, pEndpoint_pred_log_blk->m_endpoints, + cur_actual_cem, cur_log_blk.m_endpoint_ise_range, predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + { + // In practice, should never happen + use_dpcm_endpoints = false; + break; + } + } + } + + // TODO: Decide what is cheaper, endpoint DPCM vs. raw + + if (use_dpcm_endpoints) + { + total_endpoint_bits += enc.encode_and_return_price(1, use_dpcm_endpoints_model); + + total_endpoint_bits += enc.encode_and_return_price(best_reuse_index, endpoint_reuse_delta_model); + + if (astc_helpers::cem_supports_bc(cur_actual_cem)) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + total_endpoint_bits += enc.encode_and_return_price(endpoints_use_bc[part_iter], endpoints_use_bc_models[bc_model_index]); + + } // part_iter + } + + // TODO: Perhaps separate DPCM models by CEM, entry index + auto& dpcm_model = dpcm_endpoint_models[cur_log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE]; + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + int cur_e_rank = endpoint_ise_to_rank[cur_log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter]]; + int prev_e_rank = endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]; + + int e_val = imod(cur_e_rank - prev_e_rank, num_endpoint_levels); + + total_endpoint_bits += dpcm_model.get_price(e_val); + enc.encode(e_val, dpcm_model); + + } // val_iter + + } // part_iter + + total_used_endpoint_dpcm++; + } + else + { + total_endpoint_bits += enc.encode_and_return_price(0, use_dpcm_endpoints_model); + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + auto& model = raw_endpoint_models[cur_log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE]; + uint32_t e_val = cur_log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter]; + + total_endpoint_bits += model.get_price(e_val); + enc.encode(e_val, model); + + } // val_iter + + } // part_iter + + total_used_endpoint_raw++; + } + + } // if (full_cfg_endpoint_reuse_index >= 0) + + // ------------------------------------ Send weights + const uint32_t total_planes = cur_log_blk.m_dual_plane ? 2 : 1; + const uint32_t total_weights = cur_log_blk.m_grid_width * cur_log_blk.m_grid_height; + + const int num_weight_levels = astc_helpers::get_ise_levels(cur_log_blk.m_weight_ise_range); + const auto& weight_ise_to_rank = astc_helpers::g_dequant_tables.get_weight_tab(cur_log_blk.m_weight_ise_range).m_ISE_to_rank; + + uint32_t use_dct_model_index = 0; + + if (enc_cfg.m_use_dct) + { + if (pLeft_state) + use_dct_model_index = pLeft_state->m_used_weight_dct; + else + use_dct_model_index = 1; + + if (pUpper_state) + use_dct_model_index |= pUpper_state->m_used_weight_dct ? 2 : 0; + else + use_dct_model_index |= 2; + } + + if (use_faster_format) + { + bool use_dct = enc_cfg.m_use_dct; + + // TODO - tune this threshold + //const uint32_t SWITCH_TO_DPCM_NUM_COEFF_THRESH = (cur_log_blk.m_grid_width * cur_log_blk.m_grid_height * 102 + 64) >> 7; + const uint32_t SWITCH_TO_DPCM_NUM_COEFF_THRESH = (cur_log_blk.m_grid_width * cur_log_blk.m_grid_height * 45 + 64) >> 7; + + if (use_dct) + { + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + if (syms.m_max_coeff_mag > basist::astc_ldr_t::DCT_MAX_ARITH_COEFF_MAG) + { + use_dct = false; + break; + } + + if (syms.m_coeffs.size() > SWITCH_TO_DPCM_NUM_COEFF_THRESH) + { + use_dct = false; + break; + } + } + } + + if (enc_cfg.m_use_dct) + { + total_weight_bits += use_dct_model[use_dct_model_index].get_price(use_dct); + enc.encode(use_dct, use_dct_model[use_dct_model_index]); + } + + if (use_dct) + { + prev_state.m_used_weight_dct = true; + + total_used_dct++; + + if (total_planes > 1) + { + assert(blk_out.m_packed_dct_plane_data[0].m_num_dc_levels == blk_out.m_packed_dct_plane_data[1].m_num_dc_levels); + } + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + + if (syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS1) + mean1_bytes.push_back((uint8_t)syms.m_dc_sym); + else + { + assert(syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS0); + mean0_bits.put_bits(syms.m_dc_sym, 4); + } + + for (uint32_t i = 0; i < syms.m_coeffs.size(); i++) + { + if (syms.m_coeffs[i].m_coeff == INT16_MAX) + { + run_bytes.push_back(basist::astc_ldr_t::DCT_RUN_LEN_EOB_SYM_INDEX); + } + else + { + run_bytes.push_back((uint8_t)syms.m_coeffs[i].m_num_zeros); + + sign_bits.put_bits(syms.m_coeffs[i].m_coeff < 0, 1); + + assert((syms.m_coeffs[i].m_coeff != 0) && (iabs(syms.m_coeffs[i].m_coeff) <= 255)); + + coeff_bytes.push_back((uint8_t)(iabs(syms.m_coeffs[i].m_coeff) - 1)); + } + } + + } // plane_iter + } + else + { + total_used_weight_dpcm++; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + int ise_w = cur_log_blk.m_weights[plane_iter + weight_iter * total_planes]; + int w = weight_ise_to_rank[ise_w]; + + int w_to_code = w; + w_to_code = imod(w - prev_w, num_weight_levels); + + prev_w = w; + + if (num_weight_levels <= 4) + weight2_bits.put_bits((uint8_t)w_to_code, 2); + else if (num_weight_levels <= 8) + weight3_bits.put_bits((uint8_t)w_to_code, 4); + else if (num_weight_levels <= 16) + weight4_bits.put_bits((uint8_t)w_to_code, 4); + else + weight8_bits.push_back((uint8_t)w_to_code); + + } // weight_iter + + } // plane_iter + } + } + else + { + float total_dpcm_bits = 0.0f, total_dct_bits = 0.0f; + const float FORBID_DCT_BITS = 1e+8f; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + const auto& model = raw_weight_models[cur_log_blk.m_weight_ise_range - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE]; + + int ise_w = cur_log_blk.m_weights[plane_iter + weight_iter * total_planes]; + int w = weight_ise_to_rank[ise_w]; + + int w_to_code = w; + w_to_code = imod(w - prev_w, num_weight_levels); + + prev_w = w; + + total_dpcm_bits += model.get_price(w_to_code); + + } // weight_iter + + } // plane_iter + + if (enc_cfg.m_use_dct) + { + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + if (syms.m_max_coeff_mag > basist::astc_ldr_t::DCT_MAX_ARITH_COEFF_MAG) + { + total_dct_bits = FORBID_DCT_BITS; + break; + } + } + + if (total_dct_bits < FORBID_DCT_BITS) + { + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + + assert((syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS0) || (syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS1)); + + total_dct_bits += weight_mean_models[(syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS1) ? 1 : 0].get_price(syms.m_dc_sym); + + for (uint32_t i = 0; i < syms.m_coeffs.size(); i++) + { + if (syms.m_coeffs[i].m_coeff == INT16_MAX) + { + total_dct_bits += dct_run_len_model.get_price(basist::astc_ldr_t::DCT_RUN_LEN_EOB_SYM_INDEX); + } + else + { + assert(syms.m_coeffs[i].m_num_zeros < basist::astc_ldr_t::DCT_RUN_LEN_EOB_SYM_INDEX); + + total_dct_bits += dct_run_len_model.get_price(syms.m_coeffs[i].m_num_zeros); + + total_dct_bits += 1.0f; // sign bit + assert((syms.m_coeffs[i].m_coeff != 0) && (iabs(syms.m_coeffs[i].m_coeff) <= 255)); + total_dct_bits += dct_coeff_mag.get_price(iabs(syms.m_coeffs[i].m_coeff) - 1); + } + } // i + } // plane_iter + } + } + + // TODO: Check if any DCT coeff overflows 8-bit mags, switch to DPCM. (In practice, not needed.) + bool use_dct = false; + if ((enc_cfg.m_use_dct) && + (total_dct_bits < FORBID_DCT_BITS) && + ((total_dct_bits + use_dct_model[use_dct_model_index].get_price(1)) <= (total_dpcm_bits + use_dct_model[use_dct_model_index].get_price(0)))) + { + use_dct = true; + } + + if (enc_cfg.m_use_dct) + { + total_weight_bits += use_dct_model[use_dct_model_index].get_price(use_dct); + enc.encode(use_dct, use_dct_model[use_dct_model_index]); + } + + if (use_dct) + { + prev_state.m_used_weight_dct = true; + + total_used_dct++; + + if (total_planes > 1) + { + assert(blk_out.m_packed_dct_plane_data[0].m_num_dc_levels == blk_out.m_packed_dct_plane_data[1].m_num_dc_levels); + } + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + const basist::astc_ldr_t::dct_syms& syms = blk_out.m_packed_dct_plane_data[plane_iter]; + + total_weight_bits += enc.encode_and_return_price(syms.m_dc_sym, weight_mean_models[(syms.m_num_dc_levels == basist::astc_ldr_t::DCT_MEAN_LEVELS1) ? 1 : 0]); + + for (uint32_t i = 0; i < syms.m_coeffs.size(); i++) + { + if (syms.m_coeffs[i].m_coeff == INT16_MAX) + { + total_weight_bits += enc.encode_and_return_price(basist::astc_ldr_t::DCT_RUN_LEN_EOB_SYM_INDEX, dct_run_len_model); + + total_dct_syms++; + } + else + { + total_weight_bits += enc.encode_and_return_price(syms.m_coeffs[i].m_num_zeros, dct_run_len_model); + + total_dct_syms++; + + enc.put_bit(syms.m_coeffs[i].m_coeff < 0); + total_weight_bits += 1.0f; + + assert((syms.m_coeffs[i].m_coeff != 0) && (iabs(syms.m_coeffs[i].m_coeff) <= 255)); + total_weight_bits += enc.encode_and_return_price(iabs(syms.m_coeffs[i].m_coeff) - 1, dct_coeff_mag); + + total_dct_syms++; + } + } + + } // plane_iter + } + else + { + total_used_weight_dpcm++; + auto& model = raw_weight_models[cur_log_blk.m_weight_ise_range - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE]; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + int ise_w = cur_log_blk.m_weights[plane_iter + weight_iter * total_planes]; + int w = weight_ise_to_rank[ise_w]; + + int w_to_code = w; + w_to_code = imod(w - prev_w, num_weight_levels); + + prev_w = w; + + total_weight_bits += model.get_price(w_to_code); + enc.encode(w_to_code, model); + + total_dpcm_syms++; + + } // weight_iter + + } // plane_iter + } + + } // use_faster_format + + } // bx + + if (cur_run_len) + { + total_runs++; + total_run_blocks += cur_run_len; + + total_header_bits += enc.encode_and_return_price((uint32_t)basist::astc_ldr_t::xuastc_mode::cMODE_RUN, mode_model); + total_header_bits += enc.put_gamma_and_return_price(cur_run_len, m_run_len_contexts); + cur_run_len = 0; + } + + } // by + + enc.put_bits(basist::astc_ldr_t::FINAL_SYNC_MARKER, basist::astc_ldr_t::FINAL_SYNC_MARKER_BITS); + + enc.flush(); + + if (global_cfg.m_debug_output) + { + fmt_debug_printf("Encoding time: {} secs\n", itm.get_elapsed_secs()); + } + + if (global_cfg.m_debug_images) + { + save_png(global_cfg.m_debug_file_prefix + "vis_img.png", vis_img); + } + + if ((global_cfg.m_debug_images) || (global_cfg.m_debug_output)) + { + image coded_img(width, height); + + vector2D phys_blocks(num_blocks_x, num_blocks_y); + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const astc_helpers::log_astc_block& log_blk = coded_blocks(bx, by); + + color_rgba block_pixels[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + + bool status = astc_helpers::decode_block(log_blk, block_pixels, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status) + { + fmt_error_printf("astc_helpers::decode_block() failed\n"); + return false; + } + + // Be positive the logical block can be unpacked correctly as XUASTC LDR. + color_rgba block_pixels_alt[astc_ldr::ASTC_LDR_MAX_BLOCK_PIXELS]; + bool status_alt = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels_alt, block_width, block_height, enc_cfg.m_cem_enc_params.m_decode_mode_srgb ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!status_alt) + { + fmt_error_printf("astc_helpers::decode_block_xuastc_ldr() failed\n"); + return false; + } + + if (memcmp(block_pixels, block_pixels_alt, sizeof(color_rgba) * block_width * block_height) != 0) + { + fmt_error_printf("astc_helpers::decode_block_xuastc_ldr() decode pixel mismatch\n"); + return false; + } + + coded_img.set_block_clipped(block_pixels, bx * block_width, by * block_height, block_width, block_height); + + } // bx + + } //by + + if (global_cfg.m_debug_images) + save_png(global_cfg.m_debug_file_prefix + "coded_img.png", coded_img); + + if (global_cfg.m_debug_output) + { + debug_printf("Orig image vs. coded img:\n"); + print_image_metrics(orig_img, coded_img); + } + } + + const uint64_t comp_data_size = enc.get_data_buf().size(); + if (comp_data_size > UINT32_MAX) + return false; + + uint8_vec suffix_bytes; + + if (use_faster_format) + { +#if !BASISD_SUPPORT_KTX2_ZSTD + fmt_error_printf("Full ZStd syntax not supported in this build (set BASISD_SUPPORT_KTX2_ZSTD to 1)\n"); + return false; +#else + suffix_bytes.reserve(8192); + + mean0_bits.flush(); + sign_bits.flush(); + weight2_bits.flush(); + weight3_bits.flush(); + weight4_bits.flush(); + + const uint32_t zstd_level = 9; + + uint8_vec comp_mean0, comp_mean1, comp_run, comp_coeff, comp_weight2, comp_weight3, comp_weight4, comp_weight8; + + if (!zstd_compress(mean0_bits.get_bytes().data(), mean0_bits.get_bytes().size(), comp_mean0, zstd_level)) + return false; + if (!zstd_compress(mean1_bytes.data(), mean1_bytes.size(), comp_mean1, zstd_level)) + return false; + if (!zstd_compress(run_bytes.data(), run_bytes.size(), comp_run, zstd_level)) + return false; + if (!zstd_compress(coeff_bytes.data(), coeff_bytes.size(), comp_coeff, zstd_level)) + return false; + if (!zstd_compress(weight2_bits.get_bytes().data(), weight2_bits.get_bytes().size(), comp_weight2, zstd_level)) + return false; + if (!zstd_compress(weight3_bits.get_bytes().data(), weight3_bits.get_bytes().size(), comp_weight3, zstd_level)) + return false; + if (!zstd_compress(weight4_bits.get_bytes().data(), weight4_bits.get_bytes().size(), comp_weight4, zstd_level)) + return false; + if (!zstd_compress(weight8_bits.data(), weight8_bits.size(), comp_weight8, zstd_level)) + return false; + + hdr.m_flags = (uint8_t)basist::astc_ldr_t::xuastc_ldr_syntax::cHybridArithZStd; + + hdr.m_arith_bytes_len = (uint32_t)comp_data_size; + hdr.m_mean0_bits_len = (uint32_t)comp_mean0.size(); + hdr.m_mean1_bytes_len = (uint32_t)comp_mean1.size(); + hdr.m_run_bytes_len = (uint32_t)comp_run.size(); + hdr.m_coeff_bytes_len = (uint32_t)comp_coeff.size(); + hdr.m_sign_bits_len = (uint32_t)sign_bits.get_bytes().size(); + hdr.m_weight2_bits_len = (uint32_t)comp_weight2.size(); + hdr.m_weight3_bits_len = (uint32_t)comp_weight3.size(); + hdr.m_weight4_bits_len = (uint32_t)comp_weight4.size(); + hdr.m_weight8_bytes_len = (uint32_t)comp_weight8.size(); + + suffix_bytes.append(comp_mean0); + suffix_bytes.append(comp_mean1); + suffix_bytes.append(comp_run); + suffix_bytes.append(comp_coeff); + suffix_bytes.append(sign_bits.get_bytes()); + suffix_bytes.append(comp_weight2); + suffix_bytes.append(comp_weight3); + suffix_bytes.append(comp_weight4); + suffix_bytes.append(comp_weight8); + + if (global_cfg.m_debug_output) + { + fmt_debug_printf("Zstd compressed sizes:\n"); + fmt_debug_printf(" Mean0 bytes: {} comp size: {}\n", (uint64_t)mean0_bits.get_bytes().size(), (uint64_t)comp_mean0.size()); + fmt_debug_printf(" Mean1 bytes: {} comp size: {}\n", (uint64_t)mean1_bytes.size(), (uint64_t)comp_mean1.size()); + fmt_debug_printf(" Run bytes: {} comp size: {}\n", (uint64_t)run_bytes.size(), (uint64_t)comp_run.size()); + fmt_debug_printf(" Coeff bytes: {} comp size: {}\n", (uint64_t)coeff_bytes.size(), (uint64_t)comp_coeff.size()); + fmt_debug_printf(" Sign bytes: {}\n", (uint64_t)sign_bits.get_bytes().size()); + fmt_debug_printf(" Weight2 bytes: {} comp size: {}\n", (uint64_t)weight2_bits.get_bytes().size(), (uint64_t)comp_weight2.size()); + fmt_debug_printf(" Weight3 bytes: {} comp size: {}\n", (uint64_t)weight3_bits.get_bytes().size(), (uint64_t)comp_weight3.size()); + fmt_debug_printf(" Weight4 bytes: {} comp size: {}\n", (uint64_t)weight4_bits.get_bytes().size(), (uint64_t)comp_weight4.size()); + fmt_debug_printf(" Weight8 bytes: {} comp size: {}\n", (uint64_t)weight8_bits.size(), (uint64_t)comp_weight8.size()); + } +#endif + } + + assert(comp_data.size() == 0); + if (use_faster_format) + { + comp_data.resize(sizeof(hdr)); + memcpy(comp_data.data(), &hdr, sizeof(hdr)); + } + else + { + comp_data.push_back((uint8_t)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith); + } + + comp_data.append(enc.get_data_buf()); + + comp_data.append(suffix_bytes); + + if (comp_data.size() > UINT32_MAX) + return false; + + if (global_cfg.m_debug_output) + { + fmt_debug_printf("Total blocks: {}\n", total_blocks); + fmt_debug_printf("Total lossy replacements made by supercompression layer: {} {3.2}%\n", total_lossy_replacements, (float)total_lossy_replacements * 100.0f / (float)total_blocks); + fmt_debug_printf("Total runs: {}, total run blocks: {} {3.2}%\n", total_runs, total_run_blocks, (float)total_run_blocks * 100.0f / (float)total_blocks); + fmt_debug_printf("Total blocks coded (not inside runs): {} {3.2}%\n", total_nonrun_blocks, (float)total_nonrun_blocks * 100.0f / (float)total_blocks); + fmt_debug_printf("num_part_hash_probes: {}, num_part_hash_hits: {} {3.2}%\n", num_part_hash_probes, num_part_hash_hits, num_part_hash_probes ? ((float)num_part_hash_hits * 100.0f / (float)num_part_hash_probes) : 0); + fmt_debug_printf("Total DCT syms: {}, DPCM syms: {}\n", total_dct_syms, total_dpcm_syms); + + const uint32_t total_non_void_extent_blocks = total_blocks - total_solid_blocks; + + fmt_debug_printf("Total blocks using void extent: {} {3.2}%\n", + total_solid_blocks, (float)total_solid_blocks * 100.0f / (float)total_blocks); + + fmt_debug_printf("Total non void-extent blocks: {} {3.2}%\n", + total_non_void_extent_blocks, (float)total_non_void_extent_blocks * 100.0f / (float)total_blocks); + + fmt_debug_printf("Total full cfg+part ID+endpoint reuse commands: {} {3.2}%\n", + total_full_reuse_commands, (float)total_full_reuse_commands * 100.0f / (float)total_blocks); + + fmt_debug_printf("Total raw commands: {} {3.2}%\n", + total_raw_commands, (float)total_raw_commands * 100.0f / (float)total_blocks); + + fmt_debug_printf("Total reuse cfg+part ID emitted: {} {3.2}%, Total full cfg emitted: {} {3.2}%\n", + total_reuse_full_cfg_emitted, (float)total_reuse_full_cfg_emitted * 100.0f / (float)total_blocks, + total_full_cfg_emitted, (float)total_full_cfg_emitted * 100.0f / (float)total_blocks); + + fmt_debug_printf("Total coded endpoints using DPCM: {} {3.2}%\n", + total_used_endpoint_dpcm, (float)total_used_endpoint_dpcm * 100.0f / (float)total_non_void_extent_blocks); + + fmt_debug_printf("Total coded endpoints using RAW: {} {3.2}%\n", + total_used_endpoint_raw, (float)total_used_endpoint_raw * 100.0f / (float)total_non_void_extent_blocks); + + fmt_debug_printf("Total coded blocks using weight DCT: {} {3.2}%, total blocks using weight DPCM: {} {3.2}%\n", + total_used_dct, (float)total_used_dct * 100.0f / total_non_void_extent_blocks, + total_used_weight_dpcm, (float)total_used_weight_dpcm * 100.0f / (float)total_non_void_extent_blocks); + + fmt_debug_printf("Total header bits: {} bytes: {}, bpp: {}, bits per non-void extent block: {}\nTotal endpoint bits: {}, bytes: {}, bpp: {}, bits per non-void extent block: {}\nTotal weight bits: {}, bytes: {}, bpp: {}, bits per non-void extent block: {}\nTotal_bits: {} bytes: {}, bpp {}, bits per non-void extent block: {}\n", + total_header_bits, total_header_bits / 8.0f, total_header_bits / (double)total_pixels, total_header_bits / (double)total_non_void_extent_blocks, + total_endpoint_bits, total_endpoint_bits / 8.0f, total_endpoint_bits / (double)total_pixels, total_endpoint_bits / (double)total_non_void_extent_blocks, + total_weight_bits, total_weight_bits / 8.0f, total_weight_bits / (double)total_pixels, total_weight_bits / (double)total_non_void_extent_blocks, + total_header_bits + total_endpoint_bits + total_weight_bits, + (total_header_bits + total_endpoint_bits + total_weight_bits) / 8.0f, + (total_header_bits + total_endpoint_bits + total_weight_bits) / (double)total_pixels, + (total_header_bits + total_endpoint_bits + total_weight_bits) / (double)total_non_void_extent_blocks); + + fmt_debug_printf("Compressed to {} bytes, {3.3}bpp\n\n", comp_data.size_u32(), ((float)comp_data.size() * 8.0f) / (float)total_pixels); + +#if 0 + for (uint32_t i = 0; i < 4; i++) + { + solid_color_dpcm_model[i].print_prices(fmt_string("solid_color_dpcm_model[{}]:\n\n", i).c_str()); + } +#endif + } + + return true; +} + +void encoder_init() +{ + if (g_initialized) + return; + + g_initialized = true; +} + +void deblock_filter(uint32_t filter_block_width, uint32_t filter_block_height, const image& src_img, image& dst_img, bool stronger_filtering, int SKIP_THRESH) +{ + image temp_img(src_img); + + for (int y = 0; y < (int)src_img.get_height(); y++) + { + for (int x = filter_block_width; x < (int)src_img.get_width(); x += filter_block_width) + { + color_rgba ll(src_img.get_clamped(x - 2, y)); + color_rgba l(src_img.get_clamped(x - 1, y)); + color_rgba r(src_img.get_clamped(x, y)); + color_rgba rr(src_img.get_clamped(x + 1, y)); + + if (SKIP_THRESH < 256) + { + bool skip_flag = false; + for (uint32_t c = 0; c < 4; c++) + { + int delta = iabs((int)l[c] - (int)r[c]); + if (delta > SKIP_THRESH) + { + skip_flag = true; + break; + } + } + + if (skip_flag) + continue; + } + + color_rgba ml, mr; + for (uint32_t c = 0; c < 4; c++) + { + if (stronger_filtering) + { + ml[c] = (3 * l[c] + 2 * r[c] + ll[c] + 3) / 6; + mr[c] = (3 * r[c] + 2 * l[c] + rr[c] + 3) / 6; + } + else + { + ml[c] = (5 * l[c] + 2 * r[c] + ll[c] + 4) / 8; + mr[c] = (5 * r[c] + 2 * l[c] + rr[c] + 4) / 8; + } + } + + temp_img.set_clipped(x - 1, y, ml); + temp_img.set_clipped(x, y, mr); + + } // x + + } // y + + dst_img = temp_img; + + for (int x = 0; x < (int)temp_img.get_width(); x++) + { + for (int y = filter_block_height; y < (int)temp_img.get_height(); y += filter_block_height) + { + color_rgba uu(temp_img.get_clamped(x, y - 2)); + color_rgba u(temp_img.get_clamped(x, y - 1)); + color_rgba d(temp_img.get_clamped(x, y)); + color_rgba dd(temp_img.get_clamped(x, y + 1)); + + if (SKIP_THRESH < 256) + { + bool skip_flag = false; + for (uint32_t c = 0; c < 4; c++) + { + int delta = iabs((int)u[c] - (int)d[c]); + if (delta > SKIP_THRESH) + { + skip_flag = true; + break; + } + } + + if (skip_flag) + continue; + } + + color_rgba mu, md; + for (uint32_t c = 0; c < 4; c++) + { + if (stronger_filtering) + { + mu[c] = (3 * u[c] + 2 * d[c] + uu[c] + 3) / 6; + md[c] = (3 * d[c] + 2 * u[c] + dd[c] + 3) / 6; + } + else + { + mu[c] = (5 * u[c] + 2 * d[c] + uu[c] + 4) / 8; + md[c] = (5 * d[c] + 2 * u[c] + dd[c] + 4) / 8; + } + } + + dst_img.set_clipped(x, y - 1, mu); + dst_img.set_clipped(x, y, md); + + } // x + + } // y +} + +} // namespace astc_ldr +} // namespace basisu diff --git a/external/basis_universal/encoder/basisu_astc_ldr_encode.h b/external/basis_universal/encoder/basisu_astc_ldr_encode.h new file mode 100644 index 0000000000..c9e0e836d0 --- /dev/null +++ b/external/basis_universal/encoder/basisu_astc_ldr_encode.h @@ -0,0 +1,118 @@ +// File: basisu_astc_ldr_encode.cpp +#pragma once +#include "basisu_enc.h" +#include "../transcoder/basisu_astc_helpers.h" + +namespace basisu { +namespace astc_ldr { + + void encoder_init(); + + const int EFFORT_LEVEL_MIN = 0, EFFORT_LEVEL_MAX = 10, EFFORT_LEVEL_DEF = 3; + const int DCT_QUALITY_MIN = 1, DCT_QUALITY_MAX = 100; + + struct astc_ldr_encode_config + { + astc_ldr_encode_config() + { + } + + void clear() + { + *this = astc_ldr_encode_config(); + } + + // ASTC LDR block dimensions. Must be a valid ASTC block dimension. Any supported from 4x4-12x12, including unequal dimensions. + uint32_t m_astc_block_width = 6; + uint32_t m_astc_block_height = 6; + + // If true, the encoder assumes all ASTC blocks will be decompressed using sRGB vs. LDR8 mode. This corresponds to astcenc's -cs vs. cl color profiles. + // This should match how the texture is later decoded by the GPU for maximum quality. This bit is stored into the output file. + bool m_astc_decode_mode_srgb = true; + + // If true, trade off some compression (3-10%) for faster decompression. + // If false, favor highest compression, but slower decompression. + //bool m_use_faster_format = true; + + basist::astc_ldr_t::xuastc_ldr_syntax m_compressed_syntax = basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith; + + // Encoder CPU effort vs. quality. [0,10], higher=better. + // 0=extremely fast but very brittle (no subsets) + // 1=first 2 subset effort level + // 10=extremely high CPU requirements. + uint32_t m_effort_level = 3; + + // Weight grid DCT quality [1,100] - higher=better quality (JPEG-style). + float m_dct_quality = 85; + + // true=use weight grid DCT, false=always use DPCM + bool m_use_dct = false; + + // true=use lossy supercompression, false=supercompression stage is always lossless. + bool m_lossy_supercompression = false; + + // Channel weights used to compute RGBA colorspace L2 errors. Must be >= 1. + uint32_t m_comp_weights[4] = { 1, 1, 1, 1 }; + + // Lossy supercompression stage parameters for RGB vs. RGBA image inputs. + // (Bounded RDO - explictly not Lagrangian.) + float m_replacement_min_psnr = 35.0f; // if the block's base PSNR is less than this, it cannot be changed + float m_psnr_trial_diff_thresh = 1.5f; // reject candidates if their PSNR is lower than m_replacement_min_psnr-m_psnr_trial_diff_thresh + float m_psnr_trial_diff_thresh_edge = 1.0f; // edge variant + + // Lossy supercompression settings - alpha texture variants + float m_replacement_min_psnr_alpha = 38.0f; + float m_psnr_trial_diff_thresh_alpha = .75f; + float m_psnr_trial_diff_thresh_edge_alpha = .5f; + + // If true, try encoding blurred blocks, in addition to unblurred, for superpass 1 and 2. + // Higher quality, but massively slower and not yet tuned/refined. + bool m_block_blurring_p1 = false, m_block_blurring_p2 = false; + + // If true, no matter what effort level subset usage will be disabled. + bool m_force_disable_subsets = false; + + // If true, no matter what effort level RGB dual plane usage will be disabled. + bool m_force_disable_rgb_dual_plane = false; + + bool m_debug_images = false; + bool m_debug_output = false; + + std::string m_debug_file_prefix; + + void debug_print() const + { + fmt_debug_printf("ASTC block dimensions: {}x{}\n", m_astc_block_width, m_astc_block_height); + fmt_debug_printf("ASTC decode profile mode sRGB: {}\n", m_astc_decode_mode_srgb); + fmt_debug_printf("Syntax: {}\n", (uint32_t)m_compressed_syntax); + fmt_debug_printf("Effort level: {}\n", m_effort_level); + fmt_debug_printf("Use DCT: {}\n", m_use_dct); + fmt_debug_printf("DCT quality level (1-100): {}\n", m_dct_quality); + fmt_debug_printf("Comp weights: {} {} {} {}\n", m_comp_weights[0], m_comp_weights[1], m_comp_weights[2], m_comp_weights[3]); + fmt_debug_printf("Block blurring: {} {}\n", m_block_blurring_p1, m_block_blurring_p2); + fmt_debug_printf("Force disable subsets: {}\n", m_force_disable_subsets); + fmt_debug_printf("Force disable RGB dual plane: {}\n", m_force_disable_rgb_dual_plane); + + fmt_debug_printf("\nLossy supercompression: {}\n", m_lossy_supercompression); + fmt_debug_printf("m_replacement_min_psnr: {}\n", m_replacement_min_psnr); + fmt_debug_printf("m_psnr_trial_diff_thresh: {}\n", m_psnr_trial_diff_thresh); + fmt_debug_printf("m_psnr_trial_diff_thresh_edge: {}\n", m_psnr_trial_diff_thresh_edge); + fmt_debug_printf("m_replacement_min_psnr_alpha: {}\n", m_replacement_min_psnr_alpha); + fmt_debug_printf("m_psnr_trial_diff_thresh_alpha: {}\n", m_psnr_trial_diff_thresh_alpha); + fmt_debug_printf("m_psnr_trial_diff_thresh_edge_alpha: {}\n", m_psnr_trial_diff_thresh_edge_alpha); + + fmt_debug_printf("m_debug_images: {}\n", m_debug_images); + } + }; + + bool compress_image( + const image& orig_img, uint8_vec &comp_data, vector2D& coded_blocks, + const astc_ldr_encode_config& global_cfg, + job_pool& job_pool); + + void deblock_filter(uint32_t filter_block_width, uint32_t filter_block_height, const image& src_img, image& dst_img, bool stronger_filtering = false, int SKIP_THRESH = 24); + +} // namespace astc_ldr +} // namespace basisu + + diff --git a/external/basis_universal/encoder/basisu_backend.cpp b/external/basis_universal/encoder/basisu_backend.cpp index 8096e25834..adc791eff7 100644 --- a/external/basis_universal/encoder/basisu_backend.cpp +++ b/external/basis_universal/encoder/basisu_backend.cpp @@ -1,5 +1,5 @@ // basisu_backend.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ namespace basisu m_pFront_end = pFront_end; m_params = params; m_slices = slice_descs; - + debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n", m_slices.size(), params.m_etc1s, @@ -196,7 +196,7 @@ namespace basisu m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); } - // For endpoints, old_to_new[] may not be bijective! + // For endpoints, old_to_new[] may not be bijective! // Some "old" entries may be unused and don't get remapped into the "new" array. m_old_endpoint_was_used.clear(); @@ -220,13 +220,13 @@ namespace basisu } // slice_index debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); - + m_new_endpoint_was_used.clear(); m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); m_endpoint_remap_table_new_to_old.clear(); m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); - + // Set unused entries in the new array to point to the first used entry in the old array. m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); @@ -235,7 +235,7 @@ namespace basisu if (m_old_endpoint_was_used[old_index]) { const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; - + m_new_endpoint_was_used[new_index] = true; m_endpoint_remap_table_new_to_old[new_index] = old_index; @@ -612,7 +612,7 @@ namespace basisu sort_selector_codebook(); check_for_valid_cr_blocks(); - + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } @@ -666,14 +666,14 @@ namespace basisu if (m_params.m_debug_images) { image gi_unpacked; - gi.unpack(gi_unpacked); + gi.unpack(gi_unpacked, false); char buf[256]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); #else snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); -#endif +#endif save_png(buf, gi_unpacked); } @@ -682,7 +682,7 @@ namespace basisu //uint32_t g_color_delta_hist[255 * 3 + 1]; //uint32_t g_color_delta_bad_hist[255 * 3 + 1]; - + // TODO: Split this into multiple methods. bool basisu_backend::encode_image() { @@ -718,7 +718,7 @@ namespace basisu const int COLOR_DELTA_THRESH = 8; const int SEL_DIFF_THRESHOLD = 11; - + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; @@ -764,7 +764,7 @@ namespace basisu } // block_x } // block_y - + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) @@ -842,7 +842,7 @@ namespace basisu const uint32_t cur_inten5 = etc_blk.get_inten_table(0); const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index]; - + if (cur_err) { const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh); @@ -858,7 +858,7 @@ namespace basisu int best_trial_idx = 0; etc_block trial_etc_blk(etc_blk); - + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); for (int d = -search_dist; d < search_dist; d++) { @@ -876,7 +876,7 @@ namespace basisu continue; const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; - + if (m_params.m_compression_level <= 1) { if (p.m_inten5 > cur_inten5) @@ -886,7 +886,7 @@ namespace basisu int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); int color_delta = delta_r + delta_g + delta_b; - + if (color_delta > COLOR_DELTA_THRESH) continue; } @@ -924,7 +924,7 @@ namespace basisu const int64_t initial_best_trial_err = INT64_MAX; int64_t best_trial_err = initial_best_trial_err; int best_trial_idx = 0; - + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); for (int d = -search_dist; d < search_dist; d++) { @@ -942,7 +942,7 @@ namespace basisu continue; const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; - + if (m_params.m_compression_level <= 1) { if (p.m_inten5 > cur_inten5) @@ -952,7 +952,7 @@ namespace basisu int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); int color_delta = delta_r + delta_g + delta_b; - + if (color_delta > COLOR_DELTA_THRESH) continue; } @@ -992,7 +992,7 @@ namespace basisu } #endif // BASISU_SUPPORT_SSE } // if (!g_cpu_supports_sse41) - + } // if (cur_err) } // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y))) @@ -1011,7 +1011,7 @@ namespace basisu if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX)) { int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index]; - + const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f; int selector_history_buf_index = -1; @@ -1060,7 +1060,7 @@ namespace basisu for (uint32_t p = 0; p < 16; p++) cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false); } - + const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); // Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code. @@ -1091,7 +1091,7 @@ namespace basisu if (sel_diff >= SEL_DIFF_THRESHOLD) continue; } - + const uint64_t thresh_err = minimum(limit_err, best_trial_err); uint64_t trial_err = 0; @@ -1266,7 +1266,7 @@ namespace basisu //{ // printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0); //} - + double total_prep_time = tm.get_elapsed_secs(); debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time); @@ -1521,7 +1521,7 @@ namespace basisu if (old_endpoint_was_used[old_endpoint_index]) { const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; - + new_endpoint_was_used[new_endpoint_index] = true; endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; @@ -1660,7 +1660,7 @@ namespace basisu bool basisu_backend::encode_selector_palette() { const basisu_frontend& r = *m_pFront_end; - + histogram delta_selector_pal_histogram(256); for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) diff --git a/external/basis_universal/encoder/basisu_comp.cpp b/external/basis_universal/encoder/basisu_comp.cpp index 9a52f54fe4..acbedc31fd 100644 --- a/external/basis_universal/encoder/basisu_comp.cpp +++ b/external/basis_universal/encoder/basisu_comp.cpp @@ -1,5 +1,5 @@ // basisu_comp.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include "basisu_miniz.h" #include "basisu_opencl.h" +#include "basisu_astc_ldr_encode.h" #include "../transcoder/basisu_astc_hdr_core.h" @@ -50,11 +51,207 @@ using namespace buminiz; namespace basisu { + static float uastc_ldr_4x4_lambda_from_quality(float q) + { + q = clamp(q, 0.0f, 1.0f); + + if (q >= 1.0f) + return 0.0f; + + const float lambda_max = 20.0f; + return lambda_max * pow(1.0f - q, 1.3f); + } + + static float uastc_hdr_6x6_lambda_from_quality(float q) + { + // Ideally we would know if it's an upconverted LDR/SDR input, or HDR, then that controls the maximum useful lambda. + q = clamp(q, 0.0f, 1.0f); + + if (q >= 1.0f) + return 0.0f; + + const float lambda_max = 50000.0f; + return lambda_max * pow(1.0f - q, 1.5f); + } + + bool basis_compressor_params::set_format_mode_and_effort(basist::basis_tex_format mode, int effort, bool set_defaults) + { + fmt_debug_printf("set_format_mode_and_effort: mode: {}, effort: {}, set_defaults: {}\n", basist::basis_get_tex_format_name(mode), effort, set_defaults); + + set_format_mode(mode); + + if (effort > 0) + effort = clamp(effort, 0, 10); + + const float feffort = (effort >= 0) ? clamp((float)effort / 10.0f, 0.0f, 1.0f) : 0.0f; + + if (mode == basist::basis_tex_format::cETC1S) + { + if (effort >= 0) + m_etc1s_compression_level = (int)std::round(lerp(0, (float)BASISU_MAX_ETC1S_COMPRESSION_LEVEL, feffort)); + else if (set_defaults) + m_etc1s_compression_level = BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL; + + fmt_debug_printf("Low-level ETC1S compression (effort) level (0-6): {}\n", m_etc1s_compression_level); + } + else if (mode == basist::basis_tex_format::cUASTC_LDR_4x4) + { + if (effort >= 0) + m_pack_uastc_ldr_4x4_flags = (int)std::round(lerp((float)cPackUASTCLevelFastest, (float)cPackUASTCLevelVerySlow, feffort)); + else if (set_defaults) + m_pack_uastc_ldr_4x4_flags = cPackUASTCLevelDefault; + + fmt_debug_printf("Low-level UASTC LDR 4x4 pack (effort) level (0-4): {}\n", m_pack_uastc_ldr_4x4_flags); + } + else if (mode == basist::basis_tex_format::cUASTC_HDR_4x4) + { + // Set UASTC HDR 4x4 effort level (there is no quality to set - it doesn't support RDO yet). + if (effort >= 0) + m_uastc_hdr_4x4_options.set_quality_level((int)std::round(lerp((float)uastc_hdr_4x4_codec_options::cMinLevel, (float)uastc_hdr_4x4_codec_options::cMaxLevel, feffort))); + else if (set_defaults) + m_uastc_hdr_4x4_options.set_quality_level(uastc_hdr_4x4_codec_options::cDefaultLevel); + + fmt_debug_printf("Low-level UASTC HDR 4x4 quality (actually effort) level (0-4): {}\n", m_uastc_hdr_4x4_options.m_level); + } + else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)) + { + // Set ASTC HDR 6x6/UASTC HDR 6x6 effort level + if (effort >= 0) + m_astc_hdr_6x6_options.set_user_level(effort); + else if (set_defaults) + m_astc_hdr_6x6_options.set_user_level(astc_6x6_hdr::ASTC_HDR_6X6_DEF_USER_COMP_LEVEL); + + fmt_debug_printf("Low-level UASTC HDR 6x6 master comp (effort) level (0-4): {}, highest comp (effort) level (0-4): {}, num reuse XY deltas: {}, extra patterns flag: {}, brute force partition matching: {}\n", + m_astc_hdr_6x6_options.m_master_comp_level, + m_astc_hdr_6x6_options.m_highest_comp_level, + m_astc_hdr_6x6_options.m_num_reuse_xy_deltas, + m_astc_hdr_6x6_options.m_extra_patterns_flag, + m_astc_hdr_6x6_options.m_brute_force_partition_matching); + } + else if ((mode >= basist::basis_tex_format::cXUASTC_LDR_4x4) && (mode <= basist::basis_tex_format::cASTC_LDR_12x12)) + { + if (effort >= 0) + m_xuastc_ldr_effort_level = effort; + else if (set_defaults) + m_xuastc_ldr_effort_level = astc_ldr::EFFORT_LEVEL_DEF; + + fmt_debug_printf("Low-level XUASTC LDR effort level (0-10): {}\n", m_xuastc_ldr_effort_level); + } + else + { + assert(0); + return false; + } + + return true; + } + + bool basis_compressor_params::set_format_mode_and_quality_effort(basist::basis_tex_format mode, int quality, int effort, bool set_defaults) + { + fmt_debug_printf("set_format_mode_and_quality_effort: mode: {}, quality: {}, effort: {}, set_defaults: {}\n", basist::basis_get_tex_format_name(mode), quality, effort, set_defaults); + + if (!set_format_mode_and_effort(mode, effort, set_defaults)) + return false; + + if (quality > 0) + quality = clamp(quality, 0, 100); + + const float fquality = (quality >= 0) ? clamp((float)quality / 100.0f, 0.0f, 1.0f) : 0.0f; + + if (mode == basist::basis_tex_format::cETC1S) + { + // ETC1S: Map quality and effort to ETC1S quality and effort levels + if (quality >= 0) + m_quality_level = (int)std::round(lerp(0, 255.0f, fquality)); + else if (set_defaults) + m_quality_level = -1; + + fmt_debug_printf("Low-level ETC1S quality level (0-255): {}\n", m_quality_level); + } + else if (mode == basist::basis_tex_format::cUASTC_LDR_4x4) + { + // UASTC LDR 4x4: Map quality to RDO lambda scalar, effort to UASTC LDR 4x4 packing level + if ((quality >= 0) && (quality < 100)) + { + // Enable RDO postprocessing + m_rdo_uastc_ldr_4x4 = true; + + // Attempt to derive a reasonable lambda from quality + m_rdo_uastc_ldr_4x4_quality_scalar = uastc_ldr_4x4_lambda_from_quality(fquality); + } + else if (set_defaults) + { + m_rdo_uastc_ldr_4x4 = false; + + m_rdo_uastc_ldr_4x4_quality_scalar = 1.0f; // the default is 1.0, but the RDO flag isn't enabled + } + + fmt_debug_printf("Low-level UASTC LDR 4x4 RDO flag: {}, lambda setting (0=no extra distortion, higher=more distortion): {}\n", m_rdo_uastc_ldr_4x4, m_rdo_uastc_ldr_4x4_quality_scalar); + } + else if (mode == basist::basis_tex_format::cUASTC_HDR_4x4) + { + // UASTC HDR 4x4: Nothing to do for quality, it doesn't support RDO + if ((quality != -1) && (quality < 100)) + { + fmt_printf("WARNING: UASTC HDR 4x4 codec doesn't have a 'quality' parameter (it doesn't currently support RDO)\n"); + } + } + else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)) + { + // Set lambda (rate-distortion tradeoff) + if (quality >= 0) + m_astc_hdr_6x6_options.m_lambda = uastc_hdr_6x6_lambda_from_quality(fquality); + else if (set_defaults) + m_astc_hdr_6x6_options.m_lambda = 0.0f; + + fmt_debug_printf("Low-level UASTC HDR 6x6 lambda setting (0=no extra distortion, higher=more distortion): {}\n", m_astc_hdr_6x6_options.m_lambda); + } + else if ((mode >= basist::basis_tex_format::cASTC_LDR_4x4) && (mode <= basist::basis_tex_format::cASTC_LDR_12x12)) + { + // ASTC LDR 4x4-12x12: Nothing to do for quality, it doesn't support RDO + if ((quality != -1) && (quality < 100)) + { + fmt_printf("WARNING: ASTC LDR 4x4-12x12 codec doesn't have a 'quality' parameter (it doesn't currently support RDO)\n"); + } + } + else if ((mode >= basist::basis_tex_format::cXUASTC_LDR_4x4) && (mode <= basist::basis_tex_format::cXUASTC_LDR_12x12)) + { + // XUASTC LDR 4x4-12x12 + if ((quality >= 0) && (quality < 100)) + { + // Enable DCT + lossy supercompression + m_quality_level = quality; + m_xuastc_ldr_use_dct = true; + m_xuastc_ldr_use_lossy_supercompression = true; + } + else if (set_defaults) + { + m_quality_level = -1; + m_xuastc_ldr_use_dct = false; + m_xuastc_ldr_use_lossy_supercompression = false; + } + + fmt_debug_printf("Low-level XUASTC quality level (0-100): {}, Use DCT: {}, Use lossy supercompression: {}\n", m_quality_level, m_xuastc_ldr_use_dct, m_xuastc_ldr_use_lossy_supercompression); + } + else + { + assert(0); + return false; + } + + return true; + } + basis_compressor::basis_compressor() : m_pOpenCL_context(nullptr), m_fmt_mode(basist::basis_tex_format::cETC1S), + m_fmt_mode_block_width(4), + m_fmt_mode_block_height(4), + m_total_slice_orig_texels(0), m_basis_file_size(0), m_basis_bits_per_texel(0.0f), + m_ktx2_file_size(0), + m_ktx2_bits_per_texel(0.0f), m_total_blocks(0), m_hdr_image_scale(1.0f), m_ldr_to_hdr_upconversion_nit_multiplier(1.0f), @@ -63,7 +260,7 @@ namespace basisu m_opencl_failed(false) { debug_printf("basis_compressor::basis_compressor\n"); - + assert(g_library_initialized); } @@ -195,7 +392,7 @@ namespace basisu bool basis_compressor::init(const basis_compressor_params ¶ms) { debug_printf("basis_compressor::init\n"); - + if (!g_library_initialized) { error_printf("basis_compressor::init: basisu_encoder_init() MUST be called before using any encoder functionality!\n"); @@ -207,7 +404,7 @@ namespace basisu error_printf("basis_compressor::init: A non-null job_pool pointer must be specified\n"); return false; } - + m_params = params; if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data)) @@ -217,17 +414,34 @@ namespace basisu m_ldr_to_hdr_upconversion_nit_multiplier = 1.0f; m_upconverted_any_ldr_images = false; + m_total_slice_orig_texels = 0; + m_basis_file_size = 0; + m_basis_bits_per_texel = 0.0f; + m_ktx2_file_size = 0; + m_ktx2_bits_per_texel = 0.0f; + check_for_hdr_inputs(); + if (m_params.m_hdr) + { + if ((m_params.m_debug) && (m_params.m_ktx2_and_basis_srgb_transfer_function) && (m_params.m_ktx2_and_basis_srgb_transfer_function.was_changed())) + { + debug_printf("Warning: m_ktx2_and_basis_srgb_transfer_function being forced to false in HDR mode (we always write linear KTX2/.basis files in HDR mode)\n"); + } + + // Always slam m_ktx2_and_basis_srgb_transfer_function on HDR inputs. We always write linear to KTX2 and .basis for HDR outputs. + m_params.m_ktx2_and_basis_srgb_transfer_function = false; + } + if (m_params.m_debug) { - debug_printf("basis_compressor::init:\n"); + debug_printf("\nbasis_compressor::init:\n"); #define PRINT_BOOL_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); #define PRINT_INT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); #define PRINT_UINT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); #define PRINT_FLOAT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); - + fmt_debug_printf("Source LDR images: {}, HDR images: {}, filenames: {}, alpha filenames: {}, LDR mipmap images: {}, HDR mipmap images: {}\n", (uint64_t)m_params.m_source_images.size(), (uint64_t)m_params.m_source_images_hdr.size(), (uint64_t)m_params.m_source_filenames.size(), (uint64_t)m_params.m_source_alpha_filenames.size(), @@ -263,9 +477,9 @@ namespace basisu fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6\n"); break; } - case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + case hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE: { - fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6_INTERMEDIATE\n"); + fmt_debug_printf("m_hdr_mode: cUASTC_HDR_6X6_INTERMEDIATE\n"); break; } default: @@ -274,12 +488,13 @@ namespace basisu } PRINT_BOOL_VALUE(m_uastc); + PRINT_INT_VALUE(m_xuastc_or_astc_ldr_basis_tex_format); PRINT_BOOL_VALUE(m_use_opencl); PRINT_BOOL_VALUE(m_y_flip); PRINT_BOOL_VALUE(m_debug); PRINT_BOOL_VALUE(m_validate_etc1s); PRINT_BOOL_VALUE(m_debug_images); - PRINT_INT_VALUE(m_compression_level); + PRINT_INT_VALUE(m_etc1s_compression_level); PRINT_BOOL_VALUE(m_perceptual); PRINT_BOOL_VALUE(m_no_endpoint_rdo); PRINT_BOOL_VALUE(m_no_selector_rdo); @@ -296,10 +511,10 @@ namespace basisu PRINT_BOOL_VALUE(m_renormalize); PRINT_BOOL_VALUE(m_multithreading); PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); - + PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); PRINT_FLOAT_VALUE(m_selector_rdo_thresh); - + PRINT_BOOL_VALUE(m_mip_gen); PRINT_BOOL_VALUE(m_mip_renormalize); PRINT_BOOL_VALUE(m_mip_wrapping); @@ -312,14 +527,14 @@ namespace basisu debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_etc1s_max_endpoint_clusters); debug_printf("m_max_selector_clusters: %u\n", m_params.m_etc1s_max_selector_clusters); - debug_printf("m_etc1s_quality_level: %i\n", m_params.m_etc1s_quality_level); + debug_printf("m_quality_level: %i\n", m_params.m_quality_level); debug_printf("UASTC HDR 4x4 quality level: %u\n", m_params.m_uastc_hdr_4x4_options.m_level); debug_printf("m_tex_type: %u\n", m_params.m_tex_type); debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); debug_printf("m_pack_uastc_ldr_4x4_flags: 0x%X\n", m_params.m_pack_uastc_ldr_4x4_flags); - + PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4); PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_quality_scalar); PRINT_INT_VALUE(m_rdo_uastc_ldr_4x4_dict_size); @@ -333,7 +548,7 @@ namespace basisu PRINT_INT_VALUE(m_resample_width); PRINT_INT_VALUE(m_resample_height); PRINT_FLOAT_VALUE(m_resample_factor); - + debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0); if (m_params.m_pGlobal_codebooks) { @@ -344,7 +559,7 @@ namespace basisu debug_printf("KTX2 UASTC supercompression: %u\n", m_params.m_ktx2_uastc_supercompression); debug_printf("KTX2 Zstd supercompression level: %i\n", (int)m_params.m_ktx2_zstd_supercompression_level); - debug_printf("KTX2 sRGB transfer func: %u\n", (int)m_params.m_ktx2_srgb_transfer_func); + debug_printf("KTX2/basis sRGB transfer function: %u\n", (int)m_params.m_ktx2_and_basis_srgb_transfer_function); debug_printf("Total KTX2 key values: %u\n", m_params.m_ktx2_key_values.size()); for (uint32_t i = 0; i < m_params.m_ktx2_key_values.size(); i++) { @@ -353,21 +568,45 @@ namespace basisu } PRINT_BOOL_VALUE(m_validate_output_data); + PRINT_UINT_VALUE(m_transcode_flags); PRINT_BOOL_VALUE(m_ldr_hdr_upconversion_srgb_to_linear); PRINT_FLOAT_VALUE(m_ldr_hdr_upconversion_nit_multiplier); debug_printf("Allow UASTC HDR 4x4 uber mode: %u\n", m_params.m_uastc_hdr_4x4_options.m_allow_uber_mode); debug_printf("UASTC HDR 4x4 ultra quant: %u\n", m_params.m_uastc_hdr_4x4_options.m_ultra_quant); PRINT_BOOL_VALUE(m_hdr_favor_astc); - + + PRINT_INT_VALUE(m_xuastc_ldr_effort_level); + PRINT_BOOL_VALUE(m_xuastc_ldr_blurring); + PRINT_BOOL_VALUE(m_xuastc_ldr_use_dct); + PRINT_BOOL_VALUE(m_xuastc_ldr_use_lossy_supercompression); + PRINT_BOOL_VALUE(m_xuastc_ldr_force_disable_subsets); + PRINT_BOOL_VALUE(m_xuastc_ldr_force_disable_rgb_dual_plane); + PRINT_INT_VALUE(m_xuastc_ldr_syntax); + + debug_printf("XUASTC LDR channel weights: "); + for (uint32_t i = 0; i < 4; i++) + fmt_debug_printf("{} ", m_params.m_xuastc_ldr_channel_weights[i]); + debug_printf("\n"); + + PRINT_FLOAT_VALUE(m_ls_min_psnr); + PRINT_FLOAT_VALUE(m_ls_thresh_psnr); + PRINT_FLOAT_VALUE(m_ls_thresh_edge_psnr); + PRINT_FLOAT_VALUE(m_ls_min_alpha_psnr); + PRINT_FLOAT_VALUE(m_ls_thresh_alpha_psnr); + PRINT_FLOAT_VALUE(m_ls_thresh_edge_alpha_psnr); + #undef PRINT_BOOL_VALUE #undef PRINT_INT_VALUE #undef PRINT_UINT_VALUE #undef PRINT_FLOAT_VALUE + + fmt_printf("m_format_mode: {}\n", (uint32_t)m_params.get_format_mode()); + fmt_printf("\n"); } if (!sanity_check_input_params()) return false; - + if ((m_params.m_use_opencl) && opencl_is_available() && !m_pOpenCL_context && !m_opencl_failed) { m_pOpenCL_context = opencl_create_context(); @@ -378,14 +617,17 @@ namespace basisu return true; } - void basis_compressor::pick_format_mode() + bool basis_compressor::pick_format_mode() { - // Unfortunately due to the legacy of this code and backwards compat this is more complex than I would like. + // Unfortunately due to the legacy of this code and backwards API compatibility this is more complex than I would like. m_fmt_mode = basist::basis_tex_format::cETC1S; + m_fmt_mode_block_width = 4; + m_fmt_mode_block_height = 4; if (m_params.m_hdr) { assert(m_params.m_uastc); + assert(m_params.m_xuastc_or_astc_ldr_basis_tex_format == -1); switch (m_params.m_hdr_mode) { @@ -394,9 +636,13 @@ namespace basisu break; case hdr_modes::cASTC_HDR_6X6: m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6; + m_fmt_mode_block_width = 6; + m_fmt_mode_block_height = 6; break; - case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: - m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + case hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE: + m_fmt_mode = basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE; + m_fmt_mode_block_width = 6; + m_fmt_mode_block_height = 6; break; default: assert(0); @@ -405,7 +651,30 @@ namespace basisu } else if (m_params.m_uastc) { - m_fmt_mode = basist::basis_tex_format::cUASTC4x4; + if (m_params.m_xuastc_or_astc_ldr_basis_tex_format == -1) + { + // UASTC LDR 4x4 + m_fmt_mode = basist::basis_tex_format::cUASTC_LDR_4x4; + } + else + { + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + m_fmt_mode = static_cast(static_cast(m_params.m_xuastc_or_astc_ldr_basis_tex_format)); + + if (!basis_tex_format_is_xuastc_ldr(m_fmt_mode) && !basis_tex_format_is_astc_ldr(m_fmt_mode)) + { + assert(0); + error_printf("basis_compressor::pick_format_mode: m_xuastc_or_astc_ldr_basis_tex_format is invalid\n"); + return false; + } + + basist::get_basis_tex_format_block_size(m_fmt_mode, m_fmt_mode_block_width, m_fmt_mode_block_height); + } + } + else + { + // ETC1S + assert(m_params.m_xuastc_or_astc_ldr_basis_tex_format == -1); } if (m_params.m_debug) @@ -415,8 +684,8 @@ namespace basisu case basist::basis_tex_format::cETC1S: fmt_debug_printf("Format Mode: cETC1S\n"); break; - case basist::basis_tex_format::cUASTC4x4: - fmt_debug_printf("Format Mode: cUASTC4x4\n"); + case basist::basis_tex_format::cUASTC_LDR_4x4: + fmt_debug_printf("Format Mode: cUASTC_LDR_4x4\n"); break; case basist::basis_tex_format::cUASTC_HDR_4x4: fmt_debug_printf("Format Mode: cUASTC_HDR_4x4\n"); @@ -424,16 +693,56 @@ namespace basisu case basist::basis_tex_format::cASTC_HDR_6x6: fmt_debug_printf("Format Mode: cASTC_HDR_6x6\n"); break; - case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: - fmt_debug_printf("Format Mode: cASTC_HDR_6x6_INTERMEDIATE\n"); + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: + fmt_debug_printf("Format Mode: cUASTC_HDR_6x6_INTERMEDIATE\n"); + break; + + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + { + fmt_debug_printf("Format Mode: cXUASTC_LDR_{}x{}\n", m_fmt_mode_block_width, m_fmt_mode_block_height); + break; + } + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + fmt_debug_printf("Format Mode: cASTC_LDR_{}x{}\n", m_fmt_mode_block_width, m_fmt_mode_block_height); break; + } + default: assert(0); break; } } - } + return true; + } + basis_compressor::error_code basis_compressor::process() { debug_printf("basis_compressor::process\n"); @@ -441,8 +750,9 @@ namespace basisu if (!read_dds_source_images()) return cECFailedReadingSourceImages; - // Note: After here m_params.m_hdr, m_params.m_uastc and m_fmt_mode cannot be changed. - pick_format_mode(); + // Note: After here m_params.m_hdr, m_params.m_uastc and m_fmt_mode, m_fmt_mode_block_width/height cannot be changed. + if (!pick_format_mode()) + return cECFailedInvalidParameters; if (!read_source_images()) return cECFailedReadingSourceImages; @@ -459,6 +769,7 @@ namespace basisu } } + // Some modes/codecs require extracting source blocks up front. if (!extract_source_blocks()) return cECFailedFrontEnd; @@ -468,23 +779,28 @@ namespace basisu { // UASTC 4x4 HDR if (m_params.m_status_output) - printf("Mode: UASTC 4x4 HDR Level %u\n", m_params.m_uastc_hdr_4x4_options.m_level); + printf("Mode: UASTC 4x4 HDR Effort Level (0-4): %u\n", m_params.m_uastc_hdr_4x4_options.m_level); error_code ec = encode_slices_to_uastc_4x4_hdr(); if (ec != cECSuccess) return ec; } - else + else { - assert((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE)); - - // ASTC 6x6 HDR + // ASTC 6x6 HDR/UASTC HDR 6x6i + assert((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE)); + if (m_params.m_status_output) { - fmt_printf("Mode: ASTC 6x6 HDR {}, Base Level: {}, Highest Level: {}, Lambda: {}, REC 2020: {}\n", - (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE) ? "Intermediate" : "", + fmt_printf("Mode: ASTC 6x6 HDR {}, Base Effort Level (0-4): {}, Highest Effort Level (0-4): {}, Lambda: {}, REC 2020: {}\n", + (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE) ? "Intermediate" : "", m_params.m_astc_hdr_6x6_options.m_master_comp_level, m_params.m_astc_hdr_6x6_options.m_highest_comp_level, m_params.m_astc_hdr_6x6_options.m_lambda, m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut); + + if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE) + { + fmt_printf("Writing v{} compatible UASTC HDR 6x6i bitstream\n", m_params.m_astc_hdr_6x6_options.m_write_basisu_1_6_compatible_files ? "1.60" : "2.00+"); + } } error_code ec = encode_slices_to_astc_6x6_hdr(); @@ -494,11 +810,54 @@ namespace basisu } else if (m_params.m_uastc) { - // UASTC 4x4 LDR - if (m_params.m_status_output) - printf("Mode: UASTC LDR 4x4 Level %u\n", m_params.m_pack_uastc_ldr_4x4_flags & cPackUASTCLevelMask); + error_code ec = cECFailedEncodeUASTC; + + if (basis_tex_format_is_xuastc_ldr(m_fmt_mode) || basis_tex_format_is_astc_ldr(m_fmt_mode)) + { + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + if (m_params.m_status_output) + { + uint32_t block_width = 0, block_height = 0; + basist::get_basis_tex_format_block_size(m_fmt_mode, block_width, block_height); + + if (basis_tex_format_is_xuastc_ldr(m_fmt_mode)) + { + fmt_printf("Mode: XUASTC LDR {}x{}, Effort Level (0-10): {}, Disable Subsets: {}, Disable RGB Dual Plane: {}\nWeight grid DCT: {}, DCT quality level (1-100): {}, Lossy supercompression: {}, sRGB8 ASTC decode profile: {}, Syntax: {}, Channel weights: {} {} {} {}\n", + block_width, block_height, (int)m_params.m_xuastc_ldr_effort_level, (bool)m_params.m_xuastc_ldr_force_disable_subsets, (bool)m_params.m_xuastc_ldr_force_disable_rgb_dual_plane, + (bool)m_params.m_xuastc_ldr_use_dct, + (bool)m_params.m_xuastc_ldr_use_dct ? m_params.m_quality_level : 0, + (bool)m_params.m_xuastc_ldr_use_lossy_supercompression, + (bool)m_params.m_ktx2_and_basis_srgb_transfer_function, + (int)m_params.m_xuastc_ldr_syntax, + m_params.m_xuastc_ldr_channel_weights[0], m_params.m_xuastc_ldr_channel_weights[1], m_params.m_xuastc_ldr_channel_weights[2], m_params.m_xuastc_ldr_channel_weights[3]); + } + else + { + fmt_printf("Mode: ASTC LDR {}x{}, Effort Level (0-10): {}, Disable Subsets: {}, Disable RGB Dual Plane: {}, sRGB8 ASTC decode profile: {}, Syntax: {}, Channel weights: {} {} {} {}\n", + block_width, block_height, + (int)m_params.m_xuastc_ldr_effort_level, (bool)m_params.m_xuastc_ldr_force_disable_subsets, (bool)m_params.m_xuastc_ldr_force_disable_rgb_dual_plane, + (bool)m_params.m_ktx2_and_basis_srgb_transfer_function, + (int)m_params.m_xuastc_ldr_syntax, + m_params.m_xuastc_ldr_channel_weights[0], m_params.m_xuastc_ldr_channel_weights[1], m_params.m_xuastc_ldr_channel_weights[2], m_params.m_xuastc_ldr_channel_weights[3]); + } + } + + ec = encode_slices_to_xuastc_or_astc_ldr(); + } + else + { + // UASTC LDR 4x4 + if (m_params.m_status_output) + { + if (m_params.m_rdo_uastc_ldr_4x4) + fmt_printf("Mode: UASTC LDR 4x4 Effort Level (0-4): {}, using RDO lambda: {}\n", m_params.m_pack_uastc_ldr_4x4_flags & cPackUASTCLevelMask, m_params.m_rdo_uastc_ldr_4x4_quality_scalar); + else + printf("Mode: UASTC LDR 4x4 Effort Level (0-4): %u\n", m_params.m_pack_uastc_ldr_4x4_flags & cPackUASTCLevelMask); + } + + ec = encode_slices_to_uastc_4x4_ldr(); + } - error_code ec = encode_slices_to_uastc_4x4_ldr(); if (ec != cECSuccess) return ec; } @@ -506,13 +865,13 @@ namespace basisu { // ETC1S if (m_params.m_status_output) - printf("Mode: ETC1S Quality %i, Level %i\n", m_params.m_etc1s_quality_level, (int)m_params.m_compression_level); - + printf("Mode: ETC1S Quality (0-255): %i, Comp Level (Effort, 0-6): %i\n", m_params.m_quality_level, (int)m_params.m_etc1s_compression_level); + if (!process_frontend()) return cECFailedFrontEnd; if (!extract_frontend_texture_data()) - return cECFailedFontendExtract; + return cECFailedFrontendExtract; if (!process_backend()) return cECFailedBackend; @@ -533,6 +892,7 @@ namespace basisu return cECSuccess; } + // This is both ASTC HDR 6x6 and UASTC HDR 6x6i. basis_compressor::error_code basis_compressor::encode_slices_to_astc_6x6_hdr() { debug_printf("basis_compressor::encode_slices_to_astc_6x6_hdr\n"); @@ -546,14 +906,14 @@ namespace basisu if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6; - else if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE) - m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + else if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE) + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE; else { assert(0); return cECFailedEncodeUASTC; } - + m_uastc_backend_output.m_etc1s = false; m_uastc_backend_output.m_srgb = false; m_uastc_backend_output.m_slice_desc = m_slice_descs; @@ -561,13 +921,13 @@ namespace basisu m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); astc_6x6_hdr::astc_hdr_6x6_global_config global_cfg(m_params.m_astc_hdr_6x6_options); - + global_cfg.m_image_stats = m_params.m_compute_stats; global_cfg.m_debug_images = m_params.m_debug_images; global_cfg.m_output_images = m_params.m_debug_images; global_cfg.m_debug_output = m_params.m_debug; global_cfg.m_status_output = m_params.m_status_output || m_params.m_debug; - + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { gpu_image& dst_tex = m_uastc_slice_textures[slice_index]; @@ -578,7 +938,7 @@ namespace basisu const imagef& source_image = m_slice_images_hdr[slice_index]; assert(source_image.get_width() && source_image.get_height()); - + uint8_vec intermediate_tex_data, astc_tex_data; global_cfg.m_debug_image_prefix = m_params.m_astc_hdr_6x6_options.m_debug_image_prefix; @@ -586,7 +946,7 @@ namespace basisu global_cfg.m_output_image_prefix = m_params.m_astc_hdr_6x6_options.m_output_image_prefix; global_cfg.m_output_image_prefix += fmt_string("slice_{}_", slice_index); - + if (m_params.m_debug) fmt_debug_printf("----------------------------------------------------------------------------\n"); @@ -613,7 +973,7 @@ namespace basisu } else { - assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE); + assert(m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE); dst_buf.resize(intermediate_tex_data.size_in_bytes()); memcpy(&dst_buf[0], intermediate_tex_data.get_ptr(), intermediate_tex_data.size_in_bytes()); @@ -648,7 +1008,7 @@ namespace basisu m_params.m_uastc_hdr_4x4_options.m_r_err_scale = 1.0f; m_params.m_uastc_hdr_4x4_options.m_g_err_scale = 1.0f; } - + const float DEFAULT_BC6H_ERROR_WEIGHT = .65f;// .85f; const float LOWEST_BC6H_ERROR_WEIGHT = .1f; m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight = m_params.m_hdr_favor_astc ? LOWEST_BC6H_ERROR_WEIGHT : DEFAULT_BC6H_ERROR_WEIGHT; @@ -657,7 +1017,7 @@ namespace basisu any_failures.store(false); astc_hdr_4x4_block_stats enc_stats; - + struct uastc_blk_desc { uint32_t m_solid_flag; @@ -681,7 +1041,7 @@ namespace basisu return false; } - + bool operator== (const uastc_blk_desc& desc) const { if (this == &desc) @@ -709,6 +1069,10 @@ namespace basisu std::map unique_block_descs; std::mutex unique_block_desc_mutex; + std::mutex status_output_mutex; + uint32_t total_blocks_processed = 0; + float last_percentage_printed = 0; + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { gpu_image& tex = m_uastc_slice_textures[slice_index]; @@ -719,18 +1083,16 @@ namespace basisu const uint32_t num_blocks_y = tex.get_blocks_y(); const uint32_t total_blocks = tex.get_total_blocks(); const imagef& source_image = m_slice_images_hdr[slice_index]; - - std::atomic total_blocks_processed; - total_blocks_processed.store(0); - + const uint32_t N = 256; for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(total_blocks, block_index_iter + N); - - m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, - &tex, &total_blocks_processed, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex] + + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, + &tex, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex, + &status_output_mutex, &total_blocks_processed, &last_percentage_printed] { BASISU_NOTE_UNUSED(num_blocks_y); @@ -750,7 +1112,7 @@ namespace basisu source_image.extract_block_clamped(&block_pixels[0], block_x * 4, block_y * 4, 4, 4); basist::astc_blk& dest_block = *(basist::astc_blk*)tex.get_block_ptr(block_x, block_y); - + float rgb_pixels[16 * 3]; basist::half_float rgb_pixels_half[16 * 3]; for (uint32_t i = 0; i < 16; i++) @@ -764,7 +1126,7 @@ namespace basisu rgb_pixels[i * 3 + 2] = block_pixels[i][2]; rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(block_pixels[i][2]); } - + bool status = astc_hdr_4x4_enc_block(&rgb_pixels[0], rgb_pixels_half, m_params.m_uastc_hdr_4x4_options, all_results); if (!status) { @@ -774,10 +1136,10 @@ namespace basisu double best_err = 1e+30f; int best_result_index = -1; - + const double bc6h_err_weight = m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight; const double astc_err_weight = (1.0f - bc6h_err_weight); - + for (uint32_t i = 0; i < all_results.size(); i++) { basist::half_float unpacked_bc6h_block[4 * 4 * 3]; @@ -795,9 +1157,9 @@ namespace basisu } const astc_hdr_4x4_pack_results& best_results = all_results[best_result_index]; - + astc_hdr_4x4_pack_results_to_block(dest_block, best_results); - + // Verify that this block is valid UASTC HDR and we can successfully transcode it to BC6H. // (Well, except in fastest mode.) if (m_params.m_uastc_hdr_4x4_options.m_level > 0) @@ -830,12 +1192,12 @@ namespace basisu blk_desc.m_weight_ise_range = best_results.m_best_blk.m_weight_ise_range; blk_desc.m_endpoint_ise_range = best_results.m_best_blk.m_endpoint_ise_range; } - + { std::lock_guard lck(unique_block_desc_mutex); - + auto res = unique_block_descs.insert(std::make_pair(blk_desc, uastc_blk_desc_stats())); - + (res.first)->second.m_count++; #ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS (res.first)->second.m_blks.push_back(dest_block); @@ -843,17 +1205,35 @@ namespace basisu } } - total_blocks_processed++; + } // block_index + + if (m_params.m_status_output) + { + float percent_done = 0; + bool print_flag = false; - uint32_t val = total_blocks_processed; - if (((val & 1023) == 1023) && m_params.m_status_output) { - debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: %3.1f%% done\n", static_cast(val) * 100.0f / total_blocks); + std::lock_guard lck(status_output_mutex); + + total_blocks_processed += (last_index - first_index) + 1; + + percent_done = ((float)total_blocks_processed * 100.0f) / (float)total_blocks; + + if ((percent_done >= 100.0f) || (percent_done >= (last_percentage_printed + 5.0f))) + { + last_percentage_printed = percent_done; + + print_flag = true; + } } + + // minor print race here, doesn't matter + if (print_flag) + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: %3.1f%% done\n", percent_done); } }); - + } // block_index_iter m_params.m_pJob_pool->wait_for_all(); @@ -867,7 +1247,7 @@ namespace basisu m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); } // slice_index - + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: Total time: %3.3f secs\n", tm.get_elapsed_secs()); if (m_params.m_debug) @@ -914,17 +1294,183 @@ namespace basisu debug_printf(" }\n"); } #endif - + c++; } printf("\n"); - + enc_stats.print(); } return cECSuccess; } + // XUASTC 4x4-12x12 or ASTC 4x4-12x12 + basis_compressor::error_code basis_compressor::encode_slices_to_xuastc_or_astc_ldr() + { + if (m_params.m_debug) + debug_printf("basis_compressor::encode_slices_to_xuastc_or_astc_ldr\n"); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + + const texture_format tex_fmt = basist::basis_get_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(m_fmt_mode); + const basist::transcoder_texture_format transcoder_tex_fmt = basist::basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(m_fmt_mode); + + uint32_t block_width = 0, block_height = 0; + block_width = basist::basis_get_block_width(transcoder_tex_fmt); + block_height = basist::basis_get_block_height(transcoder_tex_fmt); + +#if defined(_DEBUG) || defined(DEBUG) + // sanity checking + { + uint32_t alt_block_width = 0, alt_block_height = 0; + get_basis_tex_format_block_size(m_fmt_mode, alt_block_width, alt_block_height); + assert((block_width == alt_block_width) && (block_height == alt_block_height)); + } +#endif + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(tex_fmt, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = m_fmt_mode; + + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_srgb = m_params.m_ktx2_and_basis_srgb_transfer_function; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + astc_ldr::astc_ldr_encode_config cfg; + cfg.m_astc_block_width = block_width; + cfg.m_astc_block_height = block_height; + cfg.m_block_blurring_p1 = m_params.m_xuastc_ldr_blurring; // experimental, not recommended, very slow + cfg.m_block_blurring_p2 = m_params.m_xuastc_ldr_blurring; // experimental, not recommended, very slow + cfg.m_effort_level = clamp(m_params.m_xuastc_ldr_effort_level, astc_ldr::EFFORT_LEVEL_MIN, astc_ldr::EFFORT_LEVEL_MAX); + cfg.m_force_disable_subsets = m_params.m_xuastc_ldr_force_disable_subsets; + cfg.m_force_disable_rgb_dual_plane = m_params.m_xuastc_ldr_force_disable_rgb_dual_plane; + cfg.m_astc_decode_mode_srgb = m_params.m_ktx2_and_basis_srgb_transfer_function; + + cfg.m_compressed_syntax = (basist::astc_ldr_t::xuastc_ldr_syntax)(int)m_params.m_xuastc_ldr_syntax; + if (cfg.m_compressed_syntax >= basist::astc_ldr_t::xuastc_ldr_syntax::cTotal) + { + error_printf("basis_compressor::encode_slices_to_xuastc_or_astc_ldr: Invalid XUASTC LDR syntax\n"); + return cECFailedInvalidParameters; + } + + if (basist::basis_tex_format_is_xuastc_ldr(m_fmt_mode)) + { + if (m_params.m_quality_level >= 0) + { + // Enable weight grid DCT + cfg.m_dct_quality = static_cast(clamp(m_params.m_quality_level, astc_ldr::DCT_QUALITY_MIN, astc_ldr::DCT_QUALITY_MAX)); + cfg.m_use_dct = m_params.m_xuastc_ldr_use_dct; + } + else + { + // No DCT quality level specified, but they wanted DCT - display warning + if (m_params.m_xuastc_ldr_use_dct) + { + printf("Warning: m_xuastc_ldr_use_dct enabled, but m_quality_level was -1 (not set). Not using DCT. Quality level must range from 1-100.\n"); + } + } + } + + cfg.m_lossy_supercompression = m_params.m_xuastc_ldr_use_lossy_supercompression; + + for (uint32_t i = 0; i < 4; i++) + cfg.m_comp_weights[i] = m_params.m_xuastc_ldr_channel_weights[i]; + + cfg.m_replacement_min_psnr = m_params.m_ls_min_psnr; + cfg.m_psnr_trial_diff_thresh = m_params.m_ls_thresh_psnr; + cfg.m_psnr_trial_diff_thresh_edge = m_params.m_ls_thresh_edge_psnr; + + cfg.m_replacement_min_psnr_alpha = m_params.m_ls_min_alpha_psnr; + cfg.m_psnr_trial_diff_thresh_alpha = m_params.m_ls_thresh_alpha_psnr; + cfg.m_psnr_trial_diff_thresh_edge_alpha = m_params.m_ls_thresh_edge_alpha_psnr; + + cfg.m_debug_output = m_params.m_debug; + cfg.m_debug_images = m_params.m_debug_images; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& dst_tex = m_uastc_slice_textures[slice_index]; + + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const image& slice_source_image = m_slice_images[slice_index]; + const image* pSource_image = &slice_source_image; + + image temp_image; + if ((slice_source_image.get_width() != slice_desc.m_orig_width) || (slice_source_image.get_height() != slice_desc.m_orig_height)) + { + // Copy to actual/original dimensions so PSNR statistics are calculated correctly. (There's no need to pad the image to multiples of the block dimensions.) + temp_image = slice_source_image; + temp_image.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + pSource_image = &temp_image; + } + + cfg.m_debug_file_prefix = fmt_string("slice_{}_", slice_index); + + if (m_params.m_debug) + fmt_debug_printf("----------------------------------------------------------------------------\n"); + + uint8_vec intermediate_tex_data; + vector2D coded_log_blocks; + + bool comp_status = astc_ldr::compress_image(*pSource_image, intermediate_tex_data, coded_log_blocks, cfg, *m_params.m_pJob_pool); + if (!comp_status) + return cECFailedEncodeUASTC; + + if (m_params.m_debug) + fmt_debug_printf("----------------------------------------------------------------------------\n"); + + const uint32_t num_blocks_x = dst_tex.get_blocks_x(); + const uint32_t num_blocks_y = dst_tex.get_blocks_y(); + + assert(coded_log_blocks.get_width() == num_blocks_x); + assert(coded_log_blocks.get_height() == num_blocks_y); + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + const astc_helpers::log_astc_block& log_blk = coded_log_blocks(bx, by); + + bool pack_status = astc_helpers::pack_astc_block(*static_cast(dst_tex.get_block_ptr(bx, by)), log_blk); + if (!pack_status) + { + error_printf("basis_compressor::encode_slices_to_xuastc_or_astc_ldr: pack_astc_block() failed!\n"); + return cECFailedEncodeUASTC; + } + + } // bx + } // by + + uint8_vec& dst_buf = m_uastc_backend_output.m_slice_image_data[slice_index]; + + if (basis_tex_format_is_astc_ldr(m_fmt_mode)) + { + // Plain ASTC LDR 4x4-12x12 + dst_buf.resize(dst_tex.get_size_in_bytes()); + memcpy(&dst_buf[0], dst_tex.get_ptr(), dst_tex.get_size_in_bytes()); + } + else + { + // Supercompressed XUASTC LDR 4x4-12x12 + assert(intermediate_tex_data.size_in_bytes()); + + dst_buf.resize(intermediate_tex_data.size_in_bytes()); + memcpy(&dst_buf[0], intermediate_tex_data.get_ptr(), intermediate_tex_data.size_in_bytes()); + } + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(dst_buf.get_ptr(), dst_buf.size_in_bytes(), 0); + + } // slice_index + + return cECSuccess; + } + basis_compressor::error_code basis_compressor::encode_slices_to_uastc_4x4_ldr() { debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr\n"); @@ -933,12 +1479,12 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) m_uastc_slice_textures[slice_index].init(texture_format::cUASTC4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); - m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC4x4; + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_LDR_4x4; m_uastc_backend_output.m_etc1s = false; m_uastc_backend_output.m_slice_desc = m_slice_descs; m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); - + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { gpu_image& tex = m_uastc_slice_textures[slice_index]; @@ -950,19 +1496,21 @@ namespace basisu const uint32_t total_blocks = tex.get_total_blocks(); const image& source_image = m_slice_images[slice_index]; - std::atomic total_blocks_processed; - total_blocks_processed.store(0); - + std::mutex status_output_mutex; + uint32_t total_blocks_processed = 0; + float last_percentage_printed = 0; + const uint32_t N = 256; for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum(total_blocks, block_index_iter + N); - - m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, &total_blocks_processed] + + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, + &status_output_mutex, &total_blocks_processed, &last_percentage_printed] { BASISU_NOTE_UNUSED(num_blocks_y); - + uint32_t uastc_flags = m_params.m_pack_uastc_ldr_4x4_flags; if ((m_params.m_rdo_uastc_ldr_4x4) && (m_params.m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode)) uastc_flags |= cPackUASTCFavorSimplerModes; @@ -980,14 +1528,31 @@ namespace basisu encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); - total_blocks_processed++; + } // block_index + + if (m_params.m_status_output) + { + float percent_done = 0; + bool print_flag = false; - uint32_t val = total_blocks_processed; - if (((val & 16383) == 16383) && m_params.m_status_output) { - debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr: %3.1f%% done\n", static_cast(val) * 100.0f / total_blocks); + std::lock_guard lck(status_output_mutex); + + total_blocks_processed += (last_index - first_index) + 1; + + percent_done = ((float)total_blocks_processed * 100.0f) / (float)total_blocks; + + if ((percent_done >= 100.0f) || (percent_done >= (last_percentage_printed + 5.0f))) + { + last_percentage_printed = percent_done; + + print_flag = true; + } } + // minor print race here, doesn't matter + if (print_flag) + debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr: %3.1f%% done\n", percent_done); } }); @@ -1005,7 +1570,7 @@ namespace basisu rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_ldr_4x4_dict_size; rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale; rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev; - + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_ldr_4x4_flags, m_params.m_rdo_uastc_ldr_4x4_multithreading ? m_params.m_pJob_pool : nullptr, (m_params.m_rdo_uastc_ldr_4x4_multithreading && m_params.m_pJob_pool) ? basisu::minimum(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); @@ -1017,11 +1582,11 @@ namespace basisu m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); - + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); - + } // slice_index - + return cECSuccess; } @@ -1057,8 +1622,8 @@ namespace basisu pSource_image = &mips[level - 1]; } - bool status = image_resample(*pSource_image, level_img, - //m_params.m_mip_filter.c_str(), + bool status = image_resample(*pSource_image, level_img, + //m_params.m_mip_filter.c_str(), "box", // TODO: negative lobes in the filter are causing negative colors, try Mitchell m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); if (!status) @@ -1113,12 +1678,12 @@ namespace basisu image &level_img = *enlarge_vector(mips, 1); level_img.resize(level_width, level_height); - - int result = stbir_resize_uint8_generic( + + int result = stbir_resize_uint8_generic( (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, - m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, nullptr); if (result == 0) @@ -1126,7 +1691,7 @@ namespace basisu error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); return false; } - + if (m_params.m_mip_renormalize) level_img.renormalize_normal_map(); } @@ -1170,6 +1735,7 @@ namespace basisu const uint32_t width = src_img.get_width(); const uint32_t height = src_img.get_height(); + // Find max used value float max_used_val = 0.0f; for (uint32_t y = 0; y < height; y++) { @@ -1182,6 +1748,8 @@ namespace basisu } double hdr_image_scale = 1.0f; + + // If the max value can't be encoded safely to ASTC HDR, we'll have to rescale the source image. if (max_used_val > basist::ASTC_HDR_MAX_VAL) { hdr_image_scale = max_used_val / basist::ASTC_HDR_MAX_VAL; @@ -1202,15 +1770,16 @@ namespace basisu printf("Warning: The input HDR image's maximum used float value was %f, which is too high to encode as ASTC HDR. The image's components have been linearly scaled so the maximum used value is %f, by multiplying by %f.\n", max_used_val, basist::ASTC_HDR_MAX_VAL, inv_hdr_image_scale); - printf("The decoded ASTC HDR texture will have to be scaled up by %f.\n", hdr_image_scale); + printf("The decoded/sampled ASTC HDR texture will have to be scaled up by %f. See the \"HDRScale\" KTX2 key value field.\n", hdr_image_scale); } - // TODO: Determine a constant scale factor, apply if > MAX_HALF_FLOAT + // Remember the scale factor so it can be written to the output file. + m_hdr_image_scale = (float)hdr_image_scale; + + // Final check of the input pixels for anything bad that could cause downstream encoding problems. if (!src_img.clean_astc_hdr_pixels(basist::ASTC_HDR_MAX_VAL)) printf("Warning: clean_astc_hdr_pixels() had to modify the input image to encode to ASTC HDR - see previous warning(s).\n"); - - m_hdr_image_scale = (float)hdr_image_scale; - + float lowest_nonzero_val = 1e+30f; float lowest_val = 1e+30f; float highest_val = -1e+30f; @@ -1250,7 +1819,7 @@ namespace basisu if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size()) return true; - + // See if any input filenames are .DDS bool any_dds = false, all_dds = true; for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++) @@ -1319,7 +1888,7 @@ namespace basisu ldr_mips.erase_index(0U); m_params.m_source_mipmap_images.back().swap(ldr_mips); - + any_mipmaps = true; } } @@ -1339,7 +1908,7 @@ namespace basisu hdr_mips.erase_index(0U); m_params.m_source_mipmap_images_hdr.back().swap(hdr_mips); - + any_mipmaps = true; } @@ -1363,7 +1932,7 @@ namespace basisu error_printf("HDR mode enabled, but only LDR .DDS files were loaded. HDR mode requires half or float (HDR) .DDS inputs.\n"); return false; } - + return true; } @@ -1371,7 +1940,7 @@ namespace basisu { debug_printf("basis_compressor::read_source_images\n"); - const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (m_params.m_hdr ? (uint32_t)m_params.m_source_images_hdr.size() : (uint32_t)m_params.m_source_images.size()); if (!total_source_files) @@ -1395,7 +1964,7 @@ namespace basisu basisu::vector source_images_hdr; basisu::vector source_filenames; - + // TODO: Note HDR images don't support alpha here, currently. // First load all source images, and determine if any have an alpha channel. @@ -1530,7 +2099,7 @@ namespace basisu for (uint32_t x = 0; x < file_image_hdr.get_width(); x++) { const vec4F& c = file_image_hdr(x, y); - + // For now, alpha is always 1.0f in UASTC HDR. file_image_hdr(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]); } @@ -1648,7 +2217,7 @@ namespace basisu source_filenames.push_back(pSource_filename); } - // Check if the caller has generated their own mipmaps. + // Check if the caller has generated their own mipmaps. if (m_params.m_hdr) { if (m_params.m_source_mipmap_images_hdr.size()) @@ -1661,7 +2230,7 @@ namespace basisu } } } - else + else { if (m_params.m_source_mipmap_images.size()) { @@ -1702,23 +2271,23 @@ namespace basisu for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) { const std::string &source_filename = source_filenames[source_file_index]; - + basisu::vector slices; basisu::vector slices_hdr; - + slices.reserve(32); slices_hdr.reserve(32); - + // The first (largest) mipmap level. image *pFile_image = source_images.size() ? &source_images[source_file_index] : nullptr; imagef *pFile_image_hdr = source_images_hdr.size() ? &source_images_hdr[source_file_index] : nullptr; - + // Reserve a slot for mip0. if (m_params.m_hdr) slices_hdr.resize(1); else slices.resize(1); - + if ((!m_params.m_hdr) && (m_params.m_source_mipmap_images.size())) { // User-provided mipmaps for each layer or image in the texture array. @@ -1800,10 +2369,10 @@ namespace basisu uint_vec mip_indices(m_params.m_hdr ? slices_hdr.size() : slices.size()); for (uint32_t i = 0; i < (m_params.m_hdr ? slices_hdr.size() : slices.size()); i++) mip_indices[i] = i; - + if ((!m_params.m_hdr) && (m_any_source_image_has_alpha) && (!m_params.m_uastc)) { - // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. basisu::vector alpha_slices; uint_vec new_mip_indices; @@ -1822,7 +2391,7 @@ namespace basisu lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); } } - + lvl_rgb.set_alpha(255); alpha_slices.push_back(lvl_rgb); @@ -1844,7 +2413,7 @@ namespace basisu { assert(slices.size() == mip_indices.size()); } - + for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? slices_hdr.size() : slices.size()); slice_index++) { image *pSlice_image = m_params.m_hdr ? nullptr : &slices[slice_index]; @@ -1903,10 +2472,10 @@ namespace basisu m_stats[dest_image_index].m_width = orig_width; m_stats[dest_image_index].m_height = orig_height; - debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", - m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), - orig_width, orig_height, - m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(), + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", + m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), + orig_width, orig_height, + m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(), m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height()); basisu_backend_slice_desc& slice_desc = m_slice_descs[dest_image_index]; @@ -1941,10 +2510,21 @@ namespace basisu slice_desc.m_mip_index = mip_indices[slice_index]; slice_desc.m_alpha = is_alpha_slice; + slice_desc.m_iframe = false; + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) { - slice_desc.m_iframe = (source_file_index == 0); + if (m_params.m_uastc) + { + // If it's not ETC1S, all slices are currently i-frames. + slice_desc.m_iframe = true; + } + else + { + // ETC1S: only the first frame is currently an iframe. (TODO: We can easily fix this so ETC1S has periodic i-frames.) + slice_desc.m_iframe = (source_file_index == 0); + } } m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; @@ -1969,7 +2549,7 @@ namespace basisu error_printf("Too many slices!\n"); return false; } - + // Basic sanity check on the slices for (uint32_t i = 1; i < m_slice_descs.size(); i++) { @@ -1979,7 +2559,7 @@ namespace basisu // Make sure images are in order int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; if (image_delta > 1) - return false; + return false; // Make sure mipmap levels are in order if (!image_delta) @@ -2002,8 +2582,8 @@ namespace basisu if (m_params.m_status_output) { printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", - i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, - slice_desc.m_width, slice_desc.m_height, + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, + slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); } @@ -2058,20 +2638,20 @@ namespace basisu } // Do some basic validation for 2D arrays, cubemaps, video, and volumes. - bool basis_compressor::validate_texture_type_constraints() + bool basis_compressor::validate_texture_type_constraints() { debug_printf("basis_compressor::validate_texture_type_constraints\n"); // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). if (m_params.m_tex_type == basist::cBASISTexType2D) return true; - + uint32_t total_basis_images = 0; for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - + total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); } @@ -2094,7 +2674,7 @@ namespace basisu const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); - + if (slice_desc.m_mip_index != 0) continue; @@ -2127,9 +2707,13 @@ namespace basisu debug_printf("basis_compressor::extract_source_blocks\n"); // No need to extract blocks in 6x6 mode, but the 4x4 compressors want 4x4 blocks. - if ((m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6) || (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + if ((m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6) || (m_fmt_mode == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)) return true; + // No need to extract blocks in XUASTC/ASTC LDR mode either. + if (basis_tex_format_is_xuastc_ldr(m_fmt_mode) || basis_tex_format_is_astc_ldr(m_fmt_mode)) + return true; + if (m_params.m_hdr) m_source_blocks_hdr.resize(m_total_blocks); else @@ -2184,7 +2768,7 @@ namespace basisu bool basis_compressor::process_frontend() { debug_printf("basis_compressor::process_frontend\n"); - + #if 0 // TODO basis_etc1_pack_params pack_params; @@ -2235,21 +2819,21 @@ namespace basisu error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); return false; } - - if (m_params.m_etc1s_quality_level != -1) + + if (m_params.m_quality_level != -1) { - const float quality = saturate(m_params.m_etc1s_quality_level / 255.0f); - + const float quality = saturate(m_params.m_quality_level / 255.0f); + const float bits_per_endpoint_cluster = 14.0f; const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); - + const float mid = 128.0f / 255.0f; float color_endpoint_quality = quality; const float endpoint_split_point = 0.5f; - + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; @@ -2260,7 +2844,7 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < 64) max_endpoints = 64; endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); @@ -2271,12 +2855,12 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; endpoint_clusters = clamp((uint32_t)(.5f + lerp(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); } - + float bits_per_selector_cluster = 14.0f; const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f @@ -2294,18 +2878,18 @@ namespace basisu debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); - if (m_params.m_etc1s_quality_level >= 223) + if (m_params.m_quality_level >= 223) { if (!m_params.m_selector_rdo_thresh.was_changed()) { if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= .25f; - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= .25f; } } - else if (m_params.m_etc1s_quality_level >= 192) + else if (m_params.m_quality_level >= 192) { if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= .5f; @@ -2313,7 +2897,7 @@ namespace basisu if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= .5f; } - else if (m_params.m_etc1s_quality_level >= 160) + else if (m_params.m_quality_level >= 160) { if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= .75f; @@ -2321,18 +2905,18 @@ namespace basisu if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= .75f; } - else if (m_params.m_etc1s_quality_level >= 129) + else if (m_params.m_quality_level >= 129) { float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); } } - + basisu_frontend::params p; p.m_num_source_blocks = m_total_blocks; p.m_pSource_blocks = &m_source_blocks[0]; @@ -2341,14 +2925,14 @@ namespace basisu p.m_perceptual = m_params.m_perceptual; p.m_debug_stats = m_params.m_debug; p.m_debug_images = m_params.m_debug_images; - p.m_compression_level = m_params.m_compression_level; + p.m_compression_level = m_params.m_etc1s_compression_level; p.m_tex_type = m_params.m_tex_type; p.m_multithreaded = m_params.m_multithreading; p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; p.m_validate = m_params.m_validate_etc1s; p.m_pJob_pool = m_params.m_pJob_pool; p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks; - + // Don't keep trying to use OpenCL if it ever fails. p.m_pOpenCL_context = !m_opencl_failed ? m_pOpenCL_context : nullptr; @@ -2357,7 +2941,7 @@ namespace basisu error_printf("basisu_frontend::init() failed!\n"); return false; } - + m_frontend.compress(); if (m_frontend.get_opencl_failed()) @@ -2368,18 +2952,18 @@ namespace basisu for (uint32_t i = 0; i < m_slice_descs.size(); i++) { char filename[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); #ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); } } @@ -2428,7 +3012,7 @@ namespace basisu for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); - m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); + m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i], false); } return true; @@ -2442,7 +3026,7 @@ namespace basisu backend_params.m_debug = m_params.m_debug; backend_params.m_debug_images = m_params.m_debug_images; backend_params.m_etc1s = true; - backend_params.m_compression_level = m_params.m_compression_level; + backend_params.m_compression_level = m_params.m_etc1s_compression_level; if (!m_params.m_no_endpoint_rdo) backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; @@ -2484,7 +3068,7 @@ namespace basisu m_output_basis_file = comp_data; uint32_t total_orig_pixels = 0; - + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { const basisu_backend_slice_desc& slice_desc = m_slice_descs[i]; @@ -2492,22 +3076,22 @@ namespace basisu total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height; } - m_basis_file_size = (uint32_t)comp_data.size(); + m_total_slice_orig_texels = total_orig_pixels; + m_basis_file_size = comp_data.size(); m_basis_bits_per_texel = total_orig_pixels ? (comp_data.size() * 8.0f) / total_orig_pixels : 0; - debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); + fmt_debug_printf("Total .basis output file size: {}, {3.3} bits/texel\n", m_basis_file_size, m_basis_bits_per_texel); // HDR 6x6 TODO - // HACK HACK const bool is_hdr_6x6 = m_params.m_hdr && (m_params.m_hdr_mode != hdr_modes::cUASTC_HDR_4X4); - + if (m_params.m_validate_output_data) { interval_timer tm; tm.start(); basist::basisu_transcoder_init(); - + debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); // Verify the compressed data by transcoding it to ASTC (or ETC1)/BC7 and validating the CRC's. @@ -2554,29 +3138,56 @@ namespace basisu double total_time_etc1s_or_astc = 0; - for (uint32_t slice_iter = 0; slice_iter < m_slice_descs.size(); slice_iter++) + // Select formats to transcode to + basisu::texture_format tex_format; + basist::block_format blk_format; + + if (m_params.m_hdr) + { + // HDR + tex_format = texture_format::cBC6HUnsigned; + blk_format = basist::block_format::cBC6H; + } + else if (m_fmt_mode == basist::basis_tex_format::cUASTC_LDR_4x4) + { + // UASTC LDR 4x4 + tex_format = texture_format::cUASTC4x4; + blk_format = basist::block_format::cUASTC_4x4; + } + else if (basis_tex_format_is_xuastc_ldr(m_fmt_mode) || basis_tex_format_is_astc_ldr(m_fmt_mode)) { - // Select either BC6H, UASTC LDR 4x4, or ETC1 - basisu::texture_format tex_format = m_params.m_hdr ? texture_format::cBC6HUnsigned : (m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1); - basist::block_format blk_format = m_params.m_hdr ? basist::block_format::cBC6H : (m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1); + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + basist::transcoder_texture_format transcoder_fmt = basist::basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(m_fmt_mode); + tex_format = basist::basis_get_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(m_fmt_mode); + blk_format = basist::xuastc_get_block_format(transcoder_fmt); + } + else + { + // ETC1S + tex_format = texture_format::cETC1; + blk_format = basist::block_format::cETC1; + } + + for (uint32_t slice_iter = 0; slice_iter < m_slice_descs.size(); slice_iter++) + { gpu_image decoded_texture; decoded_texture.init( tex_format, - m_slice_descs[slice_iter].m_width, m_slice_descs[slice_iter].m_height); - - tm.start(); - - const uint32_t block_size_x = basisu::get_block_width(tex_format); - const uint32_t block_size_y = basisu::get_block_height(tex_format); - const uint32_t num_dst_blocks_x = (m_slice_descs[slice_iter].m_orig_width + block_size_x - 1) / block_size_x; - const uint32_t num_dst_blocks_y = (m_slice_descs[slice_iter].m_orig_height + block_size_y - 1) / block_size_y; + m_slice_descs[slice_iter].m_orig_width, m_slice_descs[slice_iter].m_orig_height); + + const uint32_t dst_block_size_x = basisu::get_block_width(tex_format); + const uint32_t dst_block_size_y = basisu::get_block_height(tex_format); + const uint32_t num_dst_blocks_x = (m_slice_descs[slice_iter].m_orig_width + dst_block_size_x - 1) / dst_block_size_x; + const uint32_t num_dst_blocks_y = (m_slice_descs[slice_iter].m_orig_height + dst_block_size_y - 1) / dst_block_size_y; const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; + + const uint32_t bytes_per_block = decoded_texture.get_bytes_per_block(); - uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; + tm.start(); if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), slice_iter, - reinterpret_cast(decoded_texture.get_ptr()), total_dst_blocks, blk_format, bytes_per_block)) + reinterpret_cast(decoded_texture.get_ptr()), total_dst_blocks, blk_format, bytes_per_block, m_params.m_transcode_flags)) { error_printf("Transcoding failed on slice %u!\n", slice_iter); return false; @@ -2606,15 +3217,15 @@ namespace basisu if (is_hdr_6x6) { assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6)); - assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)); + assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)); for (uint32_t i = 0; i < m_slice_descs.size(); i++) { gpu_image decoded_texture; - decoded_texture.init(texture_format::cASTC_HDR_6x6, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + decoded_texture.init(texture_format::cASTC_HDR_6x6, m_slice_descs[i].m_orig_width, m_slice_descs[i].m_orig_height); if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_6x6, 16)) + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_6x6, 16, m_params.m_transcode_flags)) { error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i); return false; @@ -2630,10 +3241,10 @@ namespace basisu for (uint32_t i = 0; i < m_slice_descs.size(); i++) { gpu_image decoded_texture; - decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_orig_width, m_slice_descs[i].m_orig_height); if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16)) + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16, m_params.m_transcode_flags)) { error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i); return false; @@ -2645,16 +3256,19 @@ namespace basisu } else { - if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && + if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC_LDR_4x4) && basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) { for (uint32_t i = 0; i < m_slice_descs.size(); i++) { gpu_image decoded_texture; - decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_orig_width, m_slice_descs[i].m_orig_height); + + const uint32_t num_bc7_blocks_x = decoded_texture.get_blocks_x(); + const uint32_t num_bc7_blocks_y = decoded_texture.get_blocks_y(); if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + decoded_texture.get_ptr(), num_bc7_blocks_x * num_bc7_blocks_y, basist::block_format::cBC7, 16, m_params.m_transcode_flags)) { error_printf("Transcoding failed to BC7 on slice %u!\n", i); return false; @@ -2671,25 +3285,37 @@ namespace basisu { if (m_params.m_hdr) { - // BC6H bool status = m_decoded_output_textures[i].unpack_hdr(m_decoded_output_textures_bc6h_hdr_unpacked[i]); - assert(status); - BASISU_NOTE_UNUSED(status); - - // ASTC HDR + if (!status) + { + error_printf("Unpacking failed on slice %u!\n", i); + return false; + } + status = m_decoded_output_textures_astc_hdr[i].unpack_hdr(m_decoded_output_textures_astc_hdr_unpacked[i]); - assert(status); + if (!status) + { + error_printf("Unpacking failed on slice %u!\n", i); + return false; + } } else { - bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); - assert(status); - BASISU_NOTE_UNUSED(status); + bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i], m_params.m_ktx2_and_basis_srgb_transfer_function); + if (!status) + { + error_printf("Unpacking failed on slice %u!\n", i); + return false; + } if (m_decoded_output_textures_bc7[i].get_pixel_width()) { - status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); - assert(status); + status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i], m_params.m_ktx2_and_basis_srgb_transfer_function); + if (!status) + { + error_printf("Unpacking failed on slice %u!\n", i); + return false; + } } } } @@ -2703,6 +3329,7 @@ namespace basisu if (!is_hdr_6x6) { + // Sanity check decoded output texture sizes for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; @@ -2715,7 +3342,7 @@ namespace basisu } } // if (m_params.m_validate_output_data) - + return true; } @@ -2756,7 +3383,7 @@ namespace basisu { const std::string filename(string_format("%s_compressive_tonemapped.png", pBasename)); image compressive_tonemapped_img; - + bool status = tonemap_image_compressive(compressive_tonemapped_img, hdr_img); if (!status) { @@ -2802,7 +3429,7 @@ namespace basisu if (m_params.m_status_output) { - printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + printf("Wrote compressed output file \"%s\"\n", output_filename.c_str()); } } @@ -2824,7 +3451,7 @@ namespace basisu uint32_t total_texels = 0; for (uint32_t i = 0; i < m_slice_descs.size(); i++) total_texels += (m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height); - + m_basis_bits_per_texel = ((float)comp_size * 8.0f) / total_texels; fmt_debug_printf("Output file size: {}, {3.2} bits/texel, LZ compressed file size: {}, {3.2} bits/texel\n", @@ -2833,7 +3460,7 @@ namespace basisu } m_stats.resize(m_slice_descs.size()); - + if (m_params.m_validate_output_data) { if (m_params.m_hdr) @@ -2906,7 +3533,7 @@ namespace basisu im.calc(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true, true); s.m_basis_rgb_avg_log2_psnr = (float)im.m_psnr; - + if (m_params.m_print_stats) { printf("\nASTC Log2 RGB: "); @@ -2924,7 +3551,7 @@ namespace basisu printf("\n"); } } - + if (m_params.m_debug_images) { std::string out_basename; @@ -2940,9 +3567,9 @@ namespace basisu { gpu_image bc6h_tex(m_decoded_output_textures[slice_index]); bc6h_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - + std::string filename(out_basename + "_bc6h.dds"); - write_compressed_texture_file(filename.c_str(), bc6h_tex, true); + write_compressed_texture_file(filename.c_str(), bc6h_tex, false); printf("Wrote .DDS file %s\n", filename.c_str()); } @@ -2950,11 +3577,11 @@ namespace basisu { gpu_image astc_tex(m_decoded_output_textures_astc_hdr[slice_index]); astc_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - + std::string filename1(out_basename + "_astc.astc"); - + uint32_t block_width = 4, block_height = 4; - if ((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE)) + if ((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE)) { block_width = 6; block_height = 6; @@ -2964,7 +3591,7 @@ namespace basisu printf("Wrote .ASTC file %s\n", filename1.c_str()); std::string filename2(out_basename + "_astc.ktx"); - write_compressed_texture_file(filename2.c_str(), astc_tex, true); + write_compressed_texture_file(filename2.c_str(), astc_tex, false); printf("Wrote .KTX file %s\n", filename2.c_str()); } @@ -2972,7 +3599,7 @@ namespace basisu { imagef astc_img(m_decoded_output_textures_astc_hdr_unpacked[slice_index]); astc_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height); - + std::string filename(out_basename + "_unpacked_astc.exr"); write_exr(filename.c_str(), astc_img, 3, 0); printf("Wrote .EXR file %s\n", filename.c_str()); @@ -3007,50 +3634,50 @@ namespace basisu printf("Slice: %u\n", slice_index); image_stats& s = m_stats[slice_index]; - + image_metrics em; // ---- .basis stats em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); if (m_params.m_print_stats) - em.print(".basis RGB Avg: "); + em.print("RGB Avg: "); s.m_basis_rgb_avg_psnr = (float)em.m_psnr; em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); if (m_params.m_print_stats) - em.print(".basis RGBA Avg: "); + em.print("RGBA Avg: "); s.m_basis_rgba_avg_psnr = (float)em.m_psnr; em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); if (m_params.m_print_stats) - em.print(".basis R Avg: "); + em.print("R Avg: "); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); if (m_params.m_print_stats) - em.print(".basis G Avg: "); + em.print("G Avg: "); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); if (m_params.m_print_stats) - em.print(".basis B Avg: "); + em.print("B Avg: "); - if (m_params.m_uastc) + //if (m_params.m_uastc) { em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); if (m_params.m_print_stats) - em.print(".basis A Avg: "); + em.print("A Avg: "); s.m_basis_a_avg_psnr = (float)em.m_psnr; } em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); if (m_params.m_print_stats) - em.print(".basis 709 Luma: "); + em.print("709 Luma: "); s.m_basis_luma_709_psnr = static_cast(em.m_psnr); s.m_basis_luma_709_ssim = static_cast(em.m_ssim); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); if (m_params.m_print_stats) - em.print(".basis 601 Luma: "); + em.print("601 Luma: "); s.m_basis_luma_601_psnr = static_cast(em.m_psnr); if (m_slice_descs.size() == 1) @@ -3058,8 +3685,8 @@ namespace basisu const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); if (m_params.m_print_stats) { - debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); - debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf("RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf("Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); } } @@ -3067,45 +3694,45 @@ namespace basisu { // ---- BC7 stats em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); - //if (m_params.m_print_stats) - // em.print("BC7 RGB Avg: "); + if (m_params.m_print_stats) + em.print("BC7 RGB Avg: "); s.m_bc7_rgb_avg_psnr = (float)em.m_psnr; em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); - //if (m_params.m_print_stats) - // em.print("BC7 RGBA Avg: "); + if (m_params.m_print_stats) + em.print("BC7 RGBA Avg: "); s.m_bc7_rgba_avg_psnr = (float)em.m_psnr; em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); - //if (m_params.m_print_stats) - // em.print("BC7 R Avg: "); + if (m_params.m_print_stats) + em.print("BC7 R Avg: "); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); - //if (m_params.m_print_stats) - // em.print("BC7 G Avg: "); + if (m_params.m_print_stats) + em.print("BC7 G Avg: "); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); - //if (m_params.m_print_stats) - // em.print("BC7 B Avg: "); + if (m_params.m_print_stats) + em.print("BC7 B Avg: "); - if (m_params.m_uastc) + //if (m_params.m_uastc) { em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); - //if (m_params.m_print_stats) - // em.print("BC7 A Avg: "); + if (m_params.m_print_stats) + em.print("BC7 A Avg: "); s.m_bc7_a_avg_psnr = (float)em.m_psnr; } em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); - //if (m_params.m_print_stats) - // em.print("BC7 709 Luma: "); + if (m_params.m_print_stats) + em.print("BC7 709 Luma: "); s.m_bc7_luma_709_psnr = static_cast(em.m_psnr); s.m_bc7_luma_709_ssim = static_cast(em.m_ssim); em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); - //if (m_params.m_print_stats) - // em.print("BC7 601 Luma: "); + if (m_params.m_print_stats) + em.print("BC7 601 Luma: "); s.m_bc7_luma_601_psnr = static_cast(em.m_psnr); } @@ -3146,10 +3773,10 @@ namespace basisu { gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, true); + write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, m_params.m_ktx2_and_basis_srgb_transfer_function); image best_etc1s_unpacked; - best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + best_etc1s_gpu_image.unpack(best_etc1s_unpacked, m_params.m_ktx2_and_basis_srgb_transfer_function); save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); } } @@ -3160,7 +3787,7 @@ namespace basisu { gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, true); + write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, m_params.m_ktx2_and_basis_srgb_transfer_function); image temp(m_decoded_output_textures_unpacked[slice_index]); temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); @@ -3172,21 +3799,28 @@ namespace basisu { gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, true); + write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, m_params.m_ktx2_and_basis_srgb_transfer_function); image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); save_png(out_basename + "_transcoded_bc7.png", temp); } } + + if ((m_params.m_debug) && (m_decoded_output_textures_bc7[slice_index].get_pixel_width())) + { + const gpu_image& decoded_bc7 = m_decoded_output_textures_bc7[slice_index]; + + create_bc7_debug_images(slice_desc.m_orig_width, slice_desc.m_orig_height, decoded_bc7.get_ptr(), m_params.m_debug_images ? out_basename.c_str() : nullptr); + } } } // if (m_params.m_hdr) } // if (m_params.m_validate_output_data) - + return true; } - + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. bool basis_compressor::validate_ktx2_constraints() { @@ -3227,20 +3861,80 @@ namespace basisu return true; } + + // KTX2 DFD base definitions // colorModel=KTX2_KDF_DF_MODEL_ETC1S (0xA3) // LDR ETC1S texture data in a custom format, with global codebooks - static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + 0xA3,0x1,0x2,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE) + 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x8,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x3F,0x0, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0) + 0xFF,0xFF,0xFF,0xFF // 10 sampleHigher (0xFF) + }; + static uint8_t g_ktx2_etc1s_alpha_dfd[60] = + { + 0x3C,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x2,0x0,0x38,0x0, + 0xA3,0x1,0x2,0x0, + 0x3,0x3,0x0,0x0, + 0x8,0x8,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x3F,0x0, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0xFF,0xFF,0xFF,0xFF, + 0x40,0x0,0x3F,0xF, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0xFF,0xFF,0xFF,0xFF + }; + // colorModel=KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 (0xA6) // LDR UASTC 4x4 texture data in a custom block format - static uint8_t g_ktx2_uastc_ldr_4x4_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - static uint8_t g_ktx2_uastc_ldr_4x4_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_ldr_4x4_nonalpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x2,0x0,0x28,0x0, + 0xA6,0x1,0x2,0x0, + 0x3,0x3,0x0,0x0, + 0x10,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x7F,0x4, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0xFF,0xFF,0xFF,0xFF + }; + + static uint8_t g_ktx2_uastc_ldr_4x4_alpha_dfd[44] = + { + 0x2C,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x2,0x0,0x28,0x0, + 0xA6,0x1,0x2,0x0, + 0x3,0x3,0x0,0x0, + 0x10,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x7F,0x3, + 0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0, + 0xFF,0xFF,0xFF,0xFF + }; // colorModel=KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 (0xA7) // Standard ASTC HDR 4x4 texture data but constrained for easy transcoding to BC6H, either highest quality or RDO optimized. - static uint8_t g_ktx2_uastc_hdr_4x4_nonalpha_dfd[44] = + static uint8_t g_ktx2_uastc_hdr_4x4_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0, // 0 totalSize 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId @@ -3249,7 +3943,7 @@ namespace basisu 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 - 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x7F,0x80, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) @@ -3266,35 +3960,87 @@ namespace basisu 0x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 - 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x7F,0x80 | 0x40, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 - 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0x0, 0x0, 0x80, 0xBF, // 9 sampleLower (-1.0), to match KTX-Software expected value 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) }; - // colorModel=KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE (0xA8) + // colorModel=KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE (0xA8) // Our custom intermediate format that when decoded directly outputs ASTC HDR 6x6 - static uint8_t g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd[44] = + static uint8_t g_ktx2_uastc_hdr_6x6_intermediate_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0, // 0 totalSize 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber - 0xA8,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE) + 0xA8,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE) 0x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 - 0x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x7F,0x80, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) 0x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0) }; + // colorModel=KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE (0xA9) + // Custom supercompressed intermediate format, decodes directly to standard ASTC LDR 4x4-12x12. + static uint8_t g_ktx2_xuastc_ldr_intermediate_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + (uint8_t)basist::KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE) + 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x00, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.) + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0) + 0xFF,0xFF,0xFF,0xFF // 10 sampleHigher (0xFF) + }; + + // ASTC LDR 4x4 + static uint8_t g_ktx2_astc_ldr_dfd[44] = + { + 0x2C,0x0,0x0,0x0, // 0 totalSize + 0x0,0x0,0x0,0x0, // 1 descriptorType/vendorId + 0x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber + (uint8_t)basist::KTX2_KDF_DF_MODEL_ASTC,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel + 0x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension3 + 0x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane3 + 0x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane7 + 0x0,0x0,0x7F,0x00, // 7 bitOffset/bitLength/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.), channelID=KHR_DF_CHANNEL_ASTC_DATA + 0x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition3 + 0x0,0x0,0x0,0x0, // 9 sampleLower (0.0) + 0xFF,0xFF,0xFF,0xFF // 10 sampleHigher (0xFF) + }; + bool basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) { - const uint8_t* pDFD; - uint32_t dfd_len; + BASISU_NOTE_UNUSED(header); - if (m_params.m_uastc) + const uint8_t* pDFD = nullptr; + uint32_t dfd_len = 0; + + const bool is_xuastc_ldr = basis_tex_format_is_xuastc_ldr(m_fmt_mode); + const bool is_astc_ldr = basis_tex_format_is_astc_ldr(m_fmt_mode); + + // TODO: This was writen before m_fmt_mode existed, refactor to use that exclusively instead. + + if (is_xuastc_ldr) + { + // XUASTC LDR 4x4-12x12 + pDFD = g_ktx2_xuastc_ldr_intermediate_dfd; + dfd_len = sizeof(g_ktx2_xuastc_ldr_intermediate_dfd); + } + else if (is_astc_ldr) + { + // ASTC LDR 4x4-12x12 + pDFD = g_ktx2_astc_ldr_dfd; + dfd_len = sizeof(g_ktx2_astc_ldr_dfd); + } + else if (m_params.m_uastc) { if (m_params.m_hdr) { @@ -3302,20 +4048,26 @@ namespace basisu { case hdr_modes::cUASTC_HDR_4X4: { + assert(m_fmt_mode == basist::basis_tex_format::cUASTC_HDR_4x4); + pDFD = g_ktx2_uastc_hdr_4x4_nonalpha_dfd; dfd_len = sizeof(g_ktx2_uastc_hdr_4x4_nonalpha_dfd); break; } case hdr_modes::cASTC_HDR_6X6: { + assert(m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6); + pDFD = g_ktx2_astc_hdr_6x6_nonalpha_dfd; dfd_len = sizeof(g_ktx2_astc_hdr_6x6_nonalpha_dfd); break; } - case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: + case hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE: { - pDFD = g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd; - dfd_len = sizeof(g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd); + assert(m_fmt_mode == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); + + pDFD = g_ktx2_uastc_hdr_6x6_intermediate_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_hdr_6x6_intermediate_nonalpha_dfd); break; } default: @@ -3328,11 +4080,15 @@ namespace basisu // Must be LDR UASTC 4x4 else if (m_any_source_image_has_alpha) { + assert(m_fmt_mode == basist::basis_tex_format::cUASTC_LDR_4x4); + pDFD = g_ktx2_uastc_ldr_4x4_alpha_dfd; dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_alpha_dfd); } else { + assert(m_fmt_mode == basist::basis_tex_format::cUASTC_LDR_4x4); + pDFD = g_ktx2_uastc_ldr_4x4_nonalpha_dfd; dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_nonalpha_dfd); } @@ -3341,6 +4097,7 @@ namespace basisu { // Must be ETC1S. assert(!m_params.m_hdr); + assert(m_fmt_mode == basist::basis_tex_format::cETC1S); if (m_any_source_image_has_alpha) { @@ -3353,32 +4110,40 @@ namespace basisu dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); } } - + assert(dfd_len >= 44); dfd.resize(dfd_len); memcpy(dfd.data(), pDFD, dfd_len); + // Now modify the DFD DWORD's directly uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); - // Color primaries - if ((m_params.m_hdr) && (m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut)) + // Color primaries - TODO: Move this option outside of the m_astc_hdr_6x6_options struct. + //if ((m_params.m_hdr) && (m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut)) + if (m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut) { dfd_bits &= ~(0xFF << 8); dfd_bits |= (basist::KTX2_DF_PRIMARIES_BT2020 << 8); } - - // Transfer function + + // Write the transfer function (linear vs. sRGB) - crucial so any decoders/transcoders know which ASTC decoding profile was used during encoding. dfd_bits &= ~(0xFF << 16); if (m_params.m_hdr) { - // TODO: In HDR mode, always write linear for now. + if (m_params.m_ktx2_and_basis_srgb_transfer_function) + { + debug_printf("WARNING: In HDR mode but m_ktx2_and_basis_srgb_transfer_function was set to true, which is being ignored while writing the KTX2 DFD transfer function field\n"); + } + + // TODO: In HDR mode, always write linear, as a sRGB transfer function doesn't make sense for HDR. dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); } else { - if (m_params.m_ktx2_srgb_transfer_func) + // set the KTX2 DFD transfer function + if (m_params.m_ktx2_and_basis_srgb_transfer_function) dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); else dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); @@ -3386,6 +4151,9 @@ namespace basisu basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits); + // If supercompressed, manipulate the plane bits to match the khronos ktx2 tool's output + // 2/13/2026: for ETC1S, UASTC HDR 6x6i, UASTC LDR 4x4, and possibly other formats this differs now. Looks like we need to write valid plane sizes, Zstd supercompression or not. +#if 0 if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) { uint32_t plane_bits = basisu::read_le_dword(dfd.data() + 5 * sizeof(uint32_t)); @@ -3394,6 +4162,7 @@ namespace basisu basisu::write_le_dword(dfd.data() + 5 * sizeof(uint32_t), plane_bits); } +#endif // Fix up the DFD channel(s) uint32_t dfd_chan0 = basisu::read_le_dword(dfd.data() + 7 * sizeof(uint32_t)); @@ -3401,16 +4170,34 @@ namespace basisu if (m_params.m_uastc) { dfd_chan0 &= ~(0xF << 24); - - // TODO: Allow the caller to override this - if (m_any_source_image_has_alpha) + + // TODO: Allow the caller to override this. Derive from swizzle? + // Only do this for UASTC LDR 4x4 or XUASTC LDR 4x4-12x12 - and now also ASTC LDR 4x4-12x12, which isn't quite standard, but we need some way of determining if the ASTC data has alpha by examining the KTX2 DFD. + if ((m_any_source_image_has_alpha) && + ((m_fmt_mode == basist::basis_tex_format::cUASTC_LDR_4x4) || basist::basis_tex_format_is_xuastc_ldr(m_fmt_mode) || basis_tex_format_is_astc_ldr(m_fmt_mode))) + { dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); + } else + { + // basist::KTX2_DF_CHANNEL_UASTC_RGB==0 dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGB << 24); + } } basisu::write_le_dword(dfd.data() + 7 * sizeof(uint32_t), dfd_chan0); + if ((is_xuastc_ldr) || (is_astc_ldr)) + { + // Write XUASTC/ASTC LDR block dimensions + uint32_t texelBlockDimensions = basisu::read_le_dword(dfd.data() + 4 * sizeof(uint32_t)); + + texelBlockDimensions &= ~0xFFFF; + texelBlockDimensions |= ((m_fmt_mode_block_width - 1) | ((m_fmt_mode_block_height - 1) << 8)); + + basisu::write_le_dword(dfd.data() + 4 * sizeof(uint32_t), texelBlockDimensions); + } + return true; } @@ -3418,6 +4205,9 @@ namespace basisu { //bool needs_global_data = false; bool can_use_zstd = false; + bool is_xuastc_ldr = false; + bool is_astc_ldr = false; + bool is_hdr_6x6i = false; switch (m_fmt_mode) { @@ -3426,7 +4216,7 @@ namespace basisu //needs_global_data = true; break; } - case basist::basis_tex_format::cUASTC4x4: + case basist::basis_tex_format::cUASTC_LDR_4x4: { can_use_zstd = true; break; @@ -3441,9 +4231,49 @@ namespace basisu can_use_zstd = true; break; } - case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: { //needs_global_data = true; + is_hdr_6x6i = true; + break; + } + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + { + // has built-in compression, no need for Zstd + is_xuastc_ldr = true; + break; + } + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + // plain ASTC LDR 4x4-12x12 - can use Zstd + is_astc_ldr = true; + can_use_zstd = true; break; } default: @@ -3456,7 +4286,6 @@ namespace basisu { if ((m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_NONE) && (m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_ZSTANDARD)) { - //fmt_debug_printf("HERE 2\n"); return false; } } @@ -3507,17 +4336,52 @@ namespace basisu header.m_vk_format = basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK; else { - assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE); + assert(m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE); header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; } } else { - // Either ETC1S or UASTC LDR 4x4. - assert((m_fmt_mode == basist::basis_tex_format::cETC1S) || (m_fmt_mode == basist::basis_tex_format::cUASTC4x4)); + // Either ETC1S, UASTC LDR 4x4, or XUASTC/ASTC LDR 4x4-12x12. + assert((m_fmt_mode == basist::basis_tex_format::cETC1S) || (m_fmt_mode == basist::basis_tex_format::cUASTC_LDR_4x4) || is_xuastc_ldr || is_astc_ldr); + + if (is_astc_ldr) + { + // Get the correct Vulkan format (UNORM or sRGB). + uint32_t fmt = 0; + + assert((basist::KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK + 1) == basist::KTX2_FORMAT_ASTC_4x4_SRGB_BLOCK); - header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + switch (m_fmt_mode) + { + case basist::basis_tex_format::cASTC_LDR_4x4: fmt = basist::KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_5x4: fmt = basist::KTX2_FORMAT_ASTC_5x4_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_5x5: fmt = basist::KTX2_FORMAT_ASTC_5x5_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_6x5: fmt = basist::KTX2_FORMAT_ASTC_6x5_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_6x6: fmt = basist::KTX2_FORMAT_ASTC_6x6_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_8x5: fmt = basist::KTX2_FORMAT_ASTC_8x5_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_8x6: fmt = basist::KTX2_FORMAT_ASTC_8x6_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_10x5: fmt = basist::KTX2_FORMAT_ASTC_10x5_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_10x6: fmt = basist::KTX2_FORMAT_ASTC_10x6_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_8x8: fmt = basist::KTX2_FORMAT_ASTC_8x8_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_10x8: fmt = basist::KTX2_FORMAT_ASTC_10x8_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_10x10: fmt = basist::KTX2_FORMAT_ASTC_10x10_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_12x10: fmt = basist::KTX2_FORMAT_ASTC_12x10_UNORM_BLOCK; break; + case basist::basis_tex_format::cASTC_LDR_12x12: fmt = basist::KTX2_FORMAT_ASTC_12x12_UNORM_BLOCK; break; + default: + assert(0); + return false; + } + assert(fmt); + + header.m_vk_format = fmt + (m_params.m_ktx2_and_basis_srgb_transfer_function ? 1 : 0); + } + else + { + // A supercompressed format, i.e. not a standard format. + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + } } header.m_type_size = 1; @@ -3605,7 +4469,7 @@ namespace basisu uint8_vec ktx2_global_data; - // Create ETC1S global supercompressed data + // Create global supercompressed data if (m_fmt_mode == basist::basis_tex_format::cETC1S) { basist::ktx2_etc1s_global_data_header etc1s_global_data_header; @@ -3659,10 +4523,12 @@ namespace basisu header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; } - else if (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + else if ((is_hdr_6x6i) || (is_xuastc_ldr)) { - basisu::vector image_descs(total_levels * total_layers * total_faces); - memset((void *)image_descs.data(), 0, image_descs.size_in_bytes()); + // The global data for UASTC HDR 6x6 INTERMEDIATE and XUASTC LDR is an array of ktx2_slice_offset_len_desc_std's, which the transcoder needs to locate the variable length compressed slice data. + // Note: The original v2.0 release used ktx2_slice_offset_len_desc_orig's + basisu::vector slice_offset_len_descs(total_levels * total_layers * total_faces); + memset((void *)slice_offset_len_descs.data(), 0, slice_offset_len_descs.size_in_bytes()); for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { @@ -3680,28 +4546,84 @@ namespace basisu const uint32_t output_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index; - image_descs[output_image_index].m_rgb_slice_byte_length = m_uastc_backend_output.m_slice_image_data[slice_index].size(); - image_descs[output_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index]; + slice_offset_len_descs[output_image_index].m_slice_byte_length = m_uastc_backend_output.m_slice_image_data[slice_index].size(); + slice_offset_len_descs[output_image_index].m_slice_byte_offset = slice_level_offsets[slice_index]; + + uint32_t profile = 0; + if (is_hdr_6x6i) + { + assert(m_uastc_backend_output.m_slice_image_data[slice_index].size() >= 2); + + if (m_uastc_backend_output.m_slice_image_data[slice_index].size() >= 2) + { + // First LE16 is the marker/profile version + profile = m_uastc_backend_output.m_slice_image_data[slice_index][0] | (m_uastc_backend_output.m_slice_image_data[slice_index][1] << 8); + } + } + else + { + assert(is_xuastc_ldr); + assert(m_uastc_backend_output.m_slice_image_data[slice_index].size() >= 1); + + if (m_uastc_backend_output.m_slice_image_data[slice_index].size() >= 1) + { + // First byte is always the profile index (Zstd, hybrid, arithmetic etc.) + profile = m_uastc_backend_output.m_slice_image_data[slice_index][0] | (0x01 << 8); // TODO high byte is the XUASTC LDR codec variant index, currently hardcoded to 1 until we have an internal query/introspection API for this + } + } + + slice_offset_len_descs[output_image_index].m_profile = profile; } // slice_index - append_vector(ktx2_global_data, (const uint8_t*)image_descs.data(), image_descs.size_in_bytes()); + append_vector(ktx2_global_data, (const uint8_t*)slice_offset_len_descs.data(), slice_offset_len_descs.size_in_bytes()); + + // Note v2.0 would always write BASISLZ for the supercompression scheme. KTX-Software changes this, and we need to be compatible. + //header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; - header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; + header.m_supercompression_scheme = is_hdr_6x6i ? basist::KTX2_SS_UASTC_HDR_6x6I : basist::KTX2_SS_XUASTC_LDR; } - + // Key values basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); - + basist::ktx2_add_key_value(key_values, "KTXwriter", fmt_string("Basis Universal {}", BASISU_LIB_VERSION_STRING)); if (m_params.m_hdr) { if (m_upconverted_any_ldr_images) + { basist::ktx2_add_key_value(key_values, "LDRUpconversionMultiplier", fmt_string("{}", m_ldr_to_hdr_upconversion_nit_multiplier)); - if (m_params.m_ldr_hdr_upconversion_srgb_to_linear) - basist::ktx2_add_key_value(key_values, "LDRUpconversionSRGBToLinear", "1"); + if (m_params.m_ldr_hdr_upconversion_srgb_to_linear) + basist::ktx2_add_key_value(key_values, "LDRUpconversionSRGBToLinear", "1"); + } + + // Always write the scale to simplify testing. + //if (m_hdr_image_scale != 1.0f) + { + // add "KTXmapRange" key value + struct ktx_map_range + { + packed_uint<4> m_scale; + packed_uint<4> m_offset; + }; + + ktx_map_range val; + val.m_scale = *reinterpret_cast(&m_hdr_image_scale); + val.m_offset = 0; + + auto* pNew_key = key_values.enlarge(1); + + const char* pKey_name = "KTXmapRange"; + size_t key_name_len = strlen(pKey_name) + 1; + + pNew_key->m_key.resize(key_name_len); + memcpy(pNew_key->m_key.data(), pKey_name, key_name_len); + + pNew_key->m_value.resize(sizeof(val)); + memcpy(pNew_key->m_value.data(), &val, sizeof(val)); + } } key_values.sort(); @@ -3713,11 +4635,10 @@ namespace basisu uint8_vec key_value_data; - // DFD + // DFD (Data Format Descriptor) uint8_vec dfd; if (!get_dfd(dfd, header)) { - //fmt_debug_printf("HERE 7\n"); return false; } @@ -3729,20 +4650,17 @@ namespace basisu { if (key_values[i].m_key.size() < 2) { - //fmt_debug_printf("HERE 8\n"); return false; } if (key_values[i].m_key.back() != 0) { - //fmt_debug_printf("HERE 9\n"); return false; } const uint64_t total_len = (uint64_t)key_values[i].m_key.size() + (uint64_t)key_values[i].m_value.size(); if (total_len >= UINT32_MAX) { - //fmt_debug_printf("HERE 10\n"); return false; } @@ -3764,7 +4682,7 @@ namespace basisu #if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND break; #endif - + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. uint32_t kvd_end_file_offset = kvd_file_offset + (uint32_t)key_value_data.size(); uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; @@ -3777,22 +4695,21 @@ namespace basisu assert(!pass); if (pass) { - //fmt_debug_printf("HERE 11\n"); return false; } if (bytes_needed_to_pad < 6) bytes_needed_to_pad += 16; - // Just add the padding. It's likely not necessary anymore, but can't really hurt. + // Just add the padding. It's likely not necessary anymore, but can't really hurt other than a tiny increase in file size. //printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); - - // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). key_values.enlarge(1); for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) key_values.back().m_key.push_back(127); - + key_values.back().m_key.push_back(0); key_values.back().m_value.push_back(0); @@ -3800,13 +4717,13 @@ namespace basisu key_values.sort(); key_value_data.resize(0); - + // Try again } basisu::vector level_index_array(total_levels); memset((void *)level_index_array.data(), 0, level_index_array.size_in_bytes()); - + m_output_ktx2_file.clear(); m_output_ktx2_file.reserve(m_output_basis_file.size()); @@ -3815,8 +4732,8 @@ namespace basisu // Level index array append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); - - // DFD + + // Write DFD const uint8_t* pDFD = dfd.data(); uint32_t dfd_len = (uint32_t)dfd.size(); @@ -3824,7 +4741,7 @@ namespace basisu header.m_dfd_byte_length = dfd_len; append_vector(m_output_ktx2_file, pDFD, dfd_len); - // Key value data + // Write Key value data if (key_value_data.size()) { assert(kvd_file_offset == m_output_ktx2_file.size()); @@ -3834,7 +4751,7 @@ namespace basisu append_vector(m_output_ktx2_file, key_value_data); } - // Global Supercompressed Data + // Write Global Supercompressed Data if (ktx2_global_data.size()) { uint32_t ofs = m_output_ktx2_file.size() & 7; @@ -3848,14 +4765,13 @@ namespace basisu append_vector(m_output_ktx2_file, ktx2_global_data); } - // mipPadding + // Write mipPadding if (header.m_supercompression_scheme == basist::KTX2_SS_NONE) { - // We currently can't do this or the validator will incorrectly give an error. uint32_t ofs = m_output_ktx2_file.size() & 15; uint32_t padding = (16 - ofs) & 15; - // Make sure we're always aligned here (due to a validator bug). + // Make sure we're always aligned here (due to an old validator bug, which has been fixed). if (padding) { printf("Warning: KTX2 mip level data is not 16-byte aligned. This may trigger a ktx2check validation bug. Writing %u bytes of mipPadding.\n", padding); @@ -3869,8 +4785,7 @@ namespace basisu for (int level = total_levels - 1; level >= 0; level--) { level_index_array[level].m_byte_length = compressed_level_data_bytes[level].size(); - - //if (m_params.m_uastc) + if (can_use_zstd) { level_index_array[level].m_uncompressed_byte_length = level_data_bytes[level].size(); @@ -3879,7 +4794,7 @@ namespace basisu level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); } - + // Write final header memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); @@ -3894,7 +4809,10 @@ namespace basisu total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height; } - debug_printf("Total .ktx2 output file size: %u, %3.3f bits/texel\n", m_output_ktx2_file.size(), ((float)m_output_ktx2_file.size() * 8.0f) / total_orig_pixels); + m_ktx2_file_size = m_output_ktx2_file.size(); + m_ktx2_bits_per_texel = total_orig_pixels ? (m_ktx2_file_size * 8.0f) / total_orig_pixels : 0; + + fmt_debug_printf("Total .ktx2 output file size: {}, {3.3} bits/texel\n", m_ktx2_file_size, m_ktx2_bits_per_texel); return true; } @@ -3921,7 +4839,7 @@ namespace basisu std::atomic result; result.store(true); - + std::atomic opencl_failed; opencl_failed.store(false); @@ -3936,19 +4854,19 @@ namespace basisu tm.start(); basis_compressor c; - + // Dummy job pool job_pool task_jpool(1); params.m_pJob_pool = &task_jpool; // TODO: Remove this flag entirely - params.m_multithreading = true; - + params.m_multithreading = true; + // Stop using OpenCL if a failure ever occurs. if (opencl_failed) params.m_use_opencl = false; bool status = c.init(params); - + if (c.get_opencl_failed()) opencl_failed.store(true); @@ -3977,7 +4895,7 @@ namespace basisu else { results.m_error_code = basis_compressor::cECFailedInitializing; - + result = false; } @@ -3994,32 +4912,58 @@ namespace basisu return result; } - static void* basis_compress( + void* basis_compress_internal( basist::basis_tex_format mode, - const basisu::vector *pSource_images, - const basisu::vector *pSource_images_hdr, - uint32_t flags_and_quality, float uastc_rdo_quality, + const basisu::vector* pSource_images, + const basisu::vector* pSource_images_hdr, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, size_t* pSize, - image_stats* pStats) + image_stats* pStats, + int quality_level, int effort_level) { assert((pSource_images != nullptr) || (pSource_images_hdr != nullptr)); assert(!((pSource_images != nullptr) && (pSource_images_hdr != nullptr))); + if ((quality_level != -1) && (uastc_rdo_or_dct_quality != 0.0f)) + { + fmt_debug_printf("basis_compress_internal: quality_level is not -1, but uastc_rdo_or_dct_quality isn't 0!\n"); + + // Can't use both old and new-style quality control methods + uastc_rdo_or_dct_quality = 0.0f; + } + + if (!pSize) + { + error_printf("basis_compress: Need pSize parameter!\n"); + assert(0); + return nullptr; + } + + // Can't provide both LDR and HDR images + if ( ((pSource_images) && (pSource_images->size() != 0)) && + ((pSource_images_hdr) && (pSource_images_hdr->size() != 0)) + ) + { + error_printf("basis_compress: Can't provide both LDR and HDR source images!\n"); + assert(0); + return nullptr; + } + // Check input parameters if (pSource_images) { - if ((!pSource_images->size()) || (!pSize)) + if (!pSource_images->size()) { - error_printf("basis_compress: Invalid parameter\n"); + error_printf("basis_compress: No source LDR images\n"); assert(0); return nullptr; } } else { - if ((!pSource_images_hdr->size()) || (!pSize)) + if (!pSource_images_hdr->size()) { - error_printf("basis_compress: Invalid parameter\n"); + error_printf("basis_compress: No source HDR images\n"); assert(0); return nullptr; } @@ -4030,100 +4974,199 @@ namespace basisu // Initialize a job pool uint32_t num_threads = 1; if (flags_and_quality & cFlagThreaded) - num_threads = basisu::maximum(1, std::thread::hardware_concurrency()); + num_threads = basisu::maximum(1, get_num_hardware_threads()); job_pool jp(num_threads); // Initialize the compressor parameter struct basis_compressor_params comp_params; + + // Set the codec (basist::basis_tex_format) we'll be using. comp_params.set_format_mode(mode); comp_params.m_pJob_pool = &jp; comp_params.m_y_flip = (flags_and_quality & cFlagYFlip) != 0; + + // Set debug related parameters comp_params.m_debug = (flags_and_quality & cFlagDebug) != 0; comp_params.m_debug_images = (flags_and_quality & cFlagDebugImages) != 0; - // Copy the largest mipmap level - if (pSource_images) + // Set texture type: 2D, 2D array, cubemap array etc. + comp_params.m_tex_type = (basist::basis_texture_type)((flags_and_quality >> cFlagTextureTypeShift) & cFlagTextureTypeMask); + + if (comp_params.m_tex_type != basist::basis_texture_type::cBASISTexType2D) { - comp_params.m_source_images.resize(1); - comp_params.m_source_images[0] = (*pSource_images)[0]; - - // Copy the smaller mipmap levels, if any - if (pSource_images->size() > 1) + // 2D array, cubemap array, or texture video. Assume any extra images the user has supplied are actually cubemap faces, or array layers, or texture video frames. + // We assume the dimensions are correct here and let the compressor validate them. + // TODO: This simplified API doesn't allow the user to also specify the mipmap levels here. + if (pSource_images) { - comp_params.m_source_mipmap_images.resize(1); - comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1); - - for (uint32_t i = 1; i < pSource_images->size(); i++) - comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i]; + for (uint32_t i = 0; i < pSource_images->size(); i++) + comp_params.m_source_images.push_back((*pSource_images)[i]); + } + else + { + for (uint32_t i = 0; i < pSource_images_hdr->size(); i++) + comp_params.m_source_images_hdr.push_back((*pSource_images_hdr)[i]); } } else { - comp_params.m_source_images_hdr.resize(1); - comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0]; + // Plain 2D mode. Assume any extra images the user has supplied are precomputed mipmap levels of the correct dimensions. + // Copy the largest mipmap level and mipmaps. We assume the dimensions are correct here and let the compressor validate them. + if (pSource_images) + { + comp_params.m_source_images.resize(1); + comp_params.m_source_images[0] = (*pSource_images)[0]; + + // Copy the smaller mipmap levels, if any + if (pSource_images->size() > 1) + { + comp_params.m_source_mipmap_images.resize(1); + comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1); - // Copy the smaller mipmap levels, if any - if (pSource_images_hdr->size() > 1) + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i]; + } + } + else { - comp_params.m_source_mipmap_images_hdr.resize(1); - comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1); + comp_params.m_source_images_hdr.resize(1); + comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0]; - for (uint32_t i = 1; i < pSource_images->size(); i++) - comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i]; + // Copy the smaller mipmap levels, if any + if (pSource_images_hdr->size() > 1) + { + comp_params.m_source_mipmap_images_hdr.resize(1); + comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1); + + for (uint32_t i = 1; i < pSource_images->size(); i++) + comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i]; + } } } - + comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0; comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0; comp_params.m_write_output_basis_or_ktx2_files = false; - comp_params.m_perceptual = (flags_and_quality & cFlagSRGB) != 0; - comp_params.m_mip_srgb = comp_params.m_perceptual; + // sRGB handling - set parameters consistently + // sRGB here controls the error metrics, KTX2/.basis transfer function fields, and mipmap filtering + const bool srgb_flag = (flags_and_quality & cFlagSRGB) != 0; + + // Use sRGB colorspace metrics, channel weights + comp_params.m_perceptual = srgb_flag; + + // This will be written to the KTX2 DFD, .basis file header, also controls the ASTC profile decoding mode for ASTC LDR 4x4 - 12x12 and XUASTC LDR 4x4 - 12x12. + comp_params.m_ktx2_and_basis_srgb_transfer_function = srgb_flag; + + // Correct for sRGB transfer function during mipmapping + comp_params.m_mip_srgb = srgb_flag; + comp_params.m_mip_gen = (flags_and_quality & (cFlagGenMipsWrap | cFlagGenMipsClamp)) != 0; comp_params.m_mip_wrapping = (flags_and_quality & cFlagGenMipsWrap) != 0; - if (mode == basist::basis_tex_format::cUASTC4x4) + if (mode == basist::basis_tex_format::cUASTC_LDR_4x4) { + // Set pack level from flags comp_params.m_pack_uastc_ldr_4x4_flags = flags_and_quality & cPackUASTCLevelMask; - comp_params.m_rdo_uastc_ldr_4x4 = (flags_and_quality & cFlagUASTCRDO) != 0; - comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = uastc_rdo_quality; + + // Now optionally enable UASTC LDR 4x4 RDO. + // We used to look at the (flags_and_quality & cFlagUASTCRDO) != 0; flag to determine if we'll be using RDO here. + // The flag isn't necessary, we'll now just examine uastc_rdo_or_dct_quality and decide to enable it. + if (uastc_rdo_or_dct_quality > 0.0f) + { + comp_params.m_rdo_uastc_ldr_4x4 = true; + comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = uastc_rdo_or_dct_quality; + } } else if (mode == basist::basis_tex_format::cETC1S) { - comp_params.m_etc1s_quality_level = basisu::maximum(1, flags_and_quality & 255); + // Set ETC1S quality level (codebook sizes) from flags. + comp_params.m_quality_level = basisu::maximum(1, flags_and_quality & 255); } + else if (basist::basis_tex_format_is_xuastc_ldr(mode) || basist::basis_tex_format_is_astc_ldr(mode)) + { + // Set ASTC LDR/UASTC LDR 4x4-12x12 effort level + comp_params.m_xuastc_ldr_effort_level = flags_and_quality & 255; + + // Optionally enable weight grid DCT for XUASTC. + // Valid XUASTC LDR weight grid DCT quality levels are 1-100. + if (basist::basis_tex_format_is_xuastc_ldr(mode) && (uastc_rdo_or_dct_quality != 0.0f)) + { + if ((uastc_rdo_or_dct_quality >= (float)BASISU_XUASTC_QUALITY_MIN) && (uastc_rdo_or_dct_quality <= (float)BASISU_XUASTC_QUALITY_MAX)) + { + if (uastc_rdo_or_dct_quality < (float)BASISU_XUASTC_QUALITY_MAX) + { + // Enable weight grid DCT usage, set quality level. + comp_params.m_xuastc_ldr_use_dct = true; + comp_params.m_quality_level = (int)uastc_rdo_or_dct_quality; - comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0; + // Also enable bounded lossy distortion mode in the normally lossless supercompressor for extra savings. + comp_params.m_xuastc_ldr_use_lossy_supercompression = true; + } + } + else + { + // Invalid quality level + assert(0); + return nullptr; + } + } + if (basist::basis_tex_format_is_xuastc_ldr(mode)) + { + // Set XUASTC LDR syntax + comp_params.m_xuastc_ldr_syntax = (flags_and_quality >> cFlagXUASTCLDRSyntaxShift) & cFlagXUASTCLDRSyntaxMask; + if (comp_params.m_xuastc_ldr_syntax >= (int)basist::astc_ldr_t::xuastc_ldr_syntax::cTotal) + { + error_printf("basis_compress: basis_compressor::init() failed - invalid XUASTC LDR syntax\n"); + return nullptr; + } + } + } + + comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0; + if (comp_params.m_create_ktx2_file) { // Set KTX2 specific parameters. if ((flags_and_quality & cFlagKTX2UASTCSuperCompression) && (comp_params.m_uastc)) comp_params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; - - comp_params.m_ktx2_srgb_transfer_func = comp_params.m_perceptual; } - + comp_params.m_compute_stats = (pStats != nullptr); comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0; comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0; if (mode == basist::basis_tex_format::cUASTC_HDR_4x4) { + // Set UASTC HDR 4x4 effort level comp_params.m_uastc_hdr_4x4_options.set_quality_level(flags_and_quality & cPackUASTCLevelMask); } - else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)) { + // Set ASTC HDR 6x6/UASTC HDR 6x6 effort level comp_params.m_astc_hdr_6x6_options.set_user_level(flags_and_quality & cPackUASTCLevelMask); - comp_params.m_astc_hdr_6x6_options.m_lambda = uastc_rdo_quality; - comp_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = (flags_and_quality & cFlagREC2020) != 0; + + // Set lambda (rate-distortion tradeoff) + comp_params.m_astc_hdr_6x6_options.m_lambda = uastc_rdo_or_dct_quality; } + // TODO: REC2020 isn't specific to HDR 6x6 anymore, it's always used for KTX2 files. + // This will be written to the KTX2 DFD. + comp_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = (flags_and_quality & cFlagREC2020) != 0; + comp_params.m_validate_output_data = (flags_and_quality & cFlagValidateOutput) != 0; + + // Now set the unified quality/effort level, if they've specified it. + // This will override some of the lower-level options set above, or leave them alone if -1. + if ((quality_level != -1) || (effort_level != -1)) + { + comp_params.set_format_mode_and_quality_effort(mode, quality_level, effort_level, false); + } // Create the compressor, initialize it, and process the input basis_compressor comp; @@ -4156,6 +5199,7 @@ namespace basisu error_printf("basis_compress: Out of memory\n"); return nullptr; } + memcpy(pFile_data, pFile_data_vec->get_ptr(), pFile_data_vec->size()); *pSize = pFile_data_vec->size(); @@ -4171,27 +5215,47 @@ namespace basisu void* basis_compress( basist::basis_tex_format mode, const basisu::vector& source_images, - uint32_t flags_and_quality, float uastc_rdo_quality, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats) + { + return basis_compress_internal(mode, &source_images, nullptr, flags_and_quality, uastc_rdo_or_dct_quality, pSize, pStats, -1, -1); + } + + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images, + uint32_t flags_and_quality, int quality_level, int effort_level, size_t* pSize, image_stats* pStats) { - return basis_compress(mode, &source_images, nullptr, flags_and_quality, uastc_rdo_quality, pSize, pStats); + return basis_compress_internal(mode, &source_images, nullptr, flags_and_quality, 0.0f, pSize, pStats, quality_level, effort_level); } void* basis_compress( basist::basis_tex_format mode, const basisu::vector& source_images_hdr, - uint32_t flags_and_quality, float lambda, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats) + { + return basis_compress_internal(mode, nullptr, &source_images_hdr, flags_and_quality, uastc_rdo_or_dct_quality, pSize, pStats, -1, -1); + } + + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, int quality_level, int effort_level, size_t* pSize, image_stats* pStats) { - return basis_compress(mode, nullptr, &source_images_hdr, flags_and_quality, lambda, pSize, pStats); + return basis_compress_internal(mode, nullptr, &source_images_hdr, flags_and_quality, 0.0f, pSize, pStats, quality_level, effort_level); } void* basis_compress( basist::basis_tex_format mode, const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, - uint32_t flags_and_quality, float uastc_rdo_quality, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, size_t* pSize, image_stats* pStats) { @@ -4219,9 +5283,43 @@ namespace basisu for (uint32_t y = 0; y < height; y++) memcpy(source_image[0].get_ptr() + y * width, (const color_rgba*)pImageRGBA + y * pitch_in_pixels, width * sizeof(color_rgba)); - return basis_compress(mode, source_image, flags_and_quality, uastc_rdo_quality, pSize, pStats); + return basis_compress(mode, source_image, flags_and_quality, uastc_rdo_or_dct_quality, pSize, pStats); } + void* basis_compress2( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats) + { + if (!pitch_in_pixels) + pitch_in_pixels = width; + + if ((!pImageRGBA) || (!width) || (!height) || (pitch_in_pixels < width) || (!pSize)) + { + error_printf("basis_compress: Invalid parameter\n"); + assert(0); + return nullptr; + } + + *pSize = 0; + + if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compress: Image too large\n"); + return nullptr; + } + + // Copy the source image + basisu::vector source_image(1); + source_image[0].crop(width, height, width, g_black_color, false); + for (uint32_t y = 0; y < height; y++) + memcpy(source_image[0].get_ptr() + y * width, (const color_rgba*)pImageRGBA + y * pitch_in_pixels, width * sizeof(color_rgba)); + + return basis_compress2(mode, source_image, flags_and_quality, quality_level, effort_level, pSize, pStats); + } + void basis_free_data(void* p) { free(p); @@ -4241,7 +5339,7 @@ namespace basisu const uint32_t W = 1024, H = 1024; basisu::vector images; image& img = images.enlarge(1)->resize(W, H); - + const uint32_t NUM_RAND_LETTERS = 6000;// 40000; rand r; @@ -4283,7 +5381,7 @@ namespace basisu error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (CPU)!\n"); return false; } - + best_cpu_time = minimum(best_cpu_time, cpu_time); basis_free_data(pComp_data); @@ -4326,8 +5424,11 @@ namespace basisu } printf("Best GPU time: %3.3f\n", best_gpu_time); - + return best_gpu_time < best_cpu_time; } } // namespace basisu + + + diff --git a/external/basis_universal/encoder/basisu_comp.h b/external/basis_universal/encoder/basisu_comp.h index e761eacf7f..f03bf59388 100644 --- a/external/basis_universal/encoder/basisu_comp.h +++ b/external/basis_universal/encoder/basisu_comp.h @@ -1,5 +1,5 @@ // basisu_comp.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,9 +20,10 @@ #include "basisu_uastc_enc.h" #include "basisu_uastc_hdr_4x4_enc.h" #include "basisu_astc_hdr_6x6_enc.h" +#include "basisu_astc_ldr_encode.h" -#define BASISU_LIB_VERSION 160 -#define BASISU_LIB_VERSION_STRING "1.60" +#define BASISU_LIB_VERSION 210 +#define BASISU_LIB_VERSION_STRING "2.10" #ifndef BASISD_SUPPORT_KTX2 #error BASISD_SUPPORT_KTX2 is undefined @@ -43,20 +44,27 @@ namespace basisu const uint32_t BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION = 16384; // Allow block's color distance to increase by 1.5 while searching for an alternative nearby endpoint. - const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; - + const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; + // Allow block's color distance to increase by 1.25 while searching the selector history buffer for a close enough match. - const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; + const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; const int BASISU_DEFAULT_QUALITY = 128; const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; const uint32_t BASISU_MAX_IMAGE_DIMENSION = 16384; - const uint32_t BASISU_QUALITY_MIN = 1; + + // The original ETC1S specific (non-unified) quality level + const uint32_t BASISU_QUALITY_MIN = 1; // note 0 is also technically valid in the code/API for ETC1S; the difference in quality is tiny (both result in very small codebooks) const uint32_t BASISU_QUALITY_MAX = 255; + const uint32_t BASISU_MAX_ENDPOINT_CLUSTERS = basisu_frontend::cMaxEndpointClusters; const uint32_t BASISU_MAX_SELECTOR_CLUSTERS = basisu_frontend::cMaxSelectorClusters; + // [1,100] are also the valid unified quality levels + const uint32_t BASISU_XUASTC_QUALITY_MIN = 1; + const uint32_t BASISU_XUASTC_QUALITY_MAX = 100; + const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; const int BASISU_RDO_UASTC_DICT_SIZE_DEFAULT = 4096; // 32768; @@ -75,7 +83,7 @@ namespace basisu m_filename.clear(); m_width = 0; m_height = 0; - + m_basis_rgb_avg_psnr = 0.0f; m_basis_rgb_avg_log2_psnr = 0.0f; @@ -94,7 +102,7 @@ namespace basisu m_bc7_luma_709_psnr = 0.0f; m_bc7_luma_601_psnr = 0.0f; m_bc7_luma_709_ssim = 0.0f; - + m_best_etc1s_rgb_avg_psnr = 0.0f; m_best_etc1s_luma_709_psnr = 0.0f; m_best_etc1s_luma_601_psnr = 0.0f; @@ -128,8 +136,8 @@ namespace basisu float m_bc7_luma_709_psnr; float m_bc7_luma_601_psnr; float m_bc7_luma_709_ssim; - - // LDR: Highest achievable quality ETC1S statistics + + // LDR: Highest achievable quality ETC1S statistics, for development/comparison float m_best_etc1s_rgb_avg_psnr; float m_best_etc1s_luma_709_psnr; float m_best_etc1s_luma_601_psnr; @@ -141,11 +149,11 @@ namespace basisu enum class hdr_modes { // standard but constrained ASTC HDR 4x4 tex data that can be rapidly transcoded to BC6H - cUASTC_HDR_4X4, + cUASTC_HDR_4X4, // standard RDO optimized or non-RDO (highest quality) ASTC HDR 6x6 tex data that can be rapidly re-encoded to BC6H cASTC_HDR_6X6, // a custom intermediate format based off ASTC HDR that can be rapidly decoded straight to ASTC HDR or re-encoded to BC6H - cASTC_HDR_6X6_INTERMEDIATE, + cUASTC_HDR_6X6_INTERMEDIATE, cTotal }; @@ -230,17 +238,21 @@ namespace basisu bool m_changed; }; + // Low-level direct compressor parameters. + // Also see basis_compress() below for a simplified C-style interface. struct basis_compressor_params { basis_compressor_params() : - m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_xuastc_or_astc_ldr_basis_tex_format(-1, -1, INT_MAX), + // Note the ETC1S default compression/effort level is 2, not the command line default of 1. + m_etc1s_compression_level((int)BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_ETC1S_COMPRESSION_LEVEL), m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), m_mip_scale(1.0f, .000125f, 4.0f), m_mip_smallest_dimension(1, 1, 16384), - m_etc1s_max_endpoint_clusters(512), - m_etc1s_max_selector_clusters(512), - m_etc1s_quality_level(-1), + m_etc1s_max_endpoint_clusters(0), + m_etc1s_max_selector_clusters(0), + m_quality_level(-1), m_pack_uastc_ldr_4x4_flags(cPackUASTCLevelDefault), m_rdo_uastc_ldr_4x4_quality_scalar(1.0f, 0.001f, 50.0f), m_rdo_uastc_ldr_4x4_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX), @@ -253,8 +265,14 @@ namespace basisu m_resample_factor(0.0f, .00125f, 100.0f), m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE), m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX), + m_transcode_flags(0, 0, UINT32_MAX), m_ldr_hdr_upconversion_nit_multiplier(0.0f, 0.0f, basist::MAX_HALF_FLOAT), m_ldr_hdr_upconversion_black_bias(0.0f, 0.0f, 1.0f), + m_xuastc_ldr_effort_level(astc_ldr::EFFORT_LEVEL_DEF, astc_ldr::EFFORT_LEVEL_MIN, astc_ldr::EFFORT_LEVEL_MAX), + m_xuastc_ldr_syntax((int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd, (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith, (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd), + m_ls_min_psnr(35.0f, 0.0f, 100.0f), m_ls_min_alpha_psnr(38.0f, 0.0f, 100.0f), + m_ls_thresh_psnr(1.5f, 0.0f, 100.0f), m_ls_thresh_alpha_psnr(0.75f, 0.0f, 100.0f), + m_ls_thresh_edge_psnr(1.0f, 0.0f, 100.00f), m_ls_thresh_edge_alpha_psnr(0.5f, 0.0f, 100.00f), m_pJob_pool(nullptr) { clear(); @@ -262,9 +280,12 @@ namespace basisu void clear() { + m_format_mode = basist::basis_tex_format::cETC1S; + m_uastc.clear(); m_hdr.clear(); m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_use_opencl.clear(); m_status_output.clear(); @@ -286,7 +307,7 @@ namespace basisu m_selector_rdo_thresh.clear(); m_read_source_images.clear(); m_write_output_basis_or_ktx2_files.clear(); - m_compression_level.clear(); + m_etc1s_compression_level.clear(); m_compute_stats.clear(); m_print_stats.clear(); m_check_for_alpha.clear(); @@ -301,7 +322,7 @@ namespace basisu m_no_endpoint_rdo.clear(); m_endpoint_rdo_thresh.clear(); - + m_mip_gen.clear(); m_mip_scale.clear(); m_mip_filter = "kaiser"; @@ -315,7 +336,7 @@ namespace basisu m_etc1s_max_endpoint_clusters = 0; m_etc1s_max_selector_clusters = 0; - m_etc1s_quality_level = -1; + m_quality_level = -1; m_tex_type = basist::cBASISTexType2D; m_userdata0 = 0; @@ -342,37 +363,62 @@ namespace basisu m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; m_ktx2_key_values.clear(); m_ktx2_zstd_supercompression_level.clear(); - m_ktx2_srgb_transfer_func.clear(); + m_ktx2_and_basis_srgb_transfer_function.clear(); m_validate_output_data.clear(); + m_transcode_flags.clear(); m_ldr_hdr_upconversion_srgb_to_linear.clear(); m_hdr_favor_astc.clear(); - + m_uastc_hdr_4x4_options.init(); m_astc_hdr_6x6_options.clear(); m_ldr_hdr_upconversion_nit_multiplier.clear(); m_ldr_hdr_upconversion_black_bias.clear(); + m_xuastc_ldr_effort_level.clear(); + m_xuastc_ldr_use_dct.clear(); + m_xuastc_ldr_use_lossy_supercompression.clear(); + m_xuastc_ldr_force_disable_subsets.clear(); + m_xuastc_ldr_force_disable_rgb_dual_plane.clear(); + m_xuastc_ldr_syntax.clear(); + + m_ls_min_psnr.clear(); + m_ls_min_alpha_psnr.clear(); + m_ls_thresh_psnr.clear(); + m_ls_thresh_alpha_psnr.clear(); + m_ls_thresh_edge_psnr.clear(); + m_ls_thresh_edge_alpha_psnr.clear(); + for (uint32_t i = 0; i < 4; i++) + m_xuastc_ldr_channel_weights[i] = 1; + m_xuastc_ldr_blurring.clear(); + m_pJob_pool = nullptr; } - + // Configures the compressor's mode by setting the proper parameters (which were preserved for backwards compatibility with old code). - void set_format_mode(basist::basis_tex_format m) + // This is by far the preferred way of controlling which codec mode the compressor will select. + void set_format_mode(basist::basis_tex_format mode) { - switch (m) + m_format_mode = mode; + + switch (mode) { case basist::basis_tex_format::cETC1S: { + // ETC1S + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_hdr = false; m_uastc = false; m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter break; } - case basist::basis_tex_format::cUASTC4x4: + case basist::basis_tex_format::cUASTC_LDR_4x4: { + // UASTC LDR 4x4 + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_hdr = false; m_uastc = true; m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter @@ -380,6 +426,8 @@ namespace basisu } case basist::basis_tex_format::cUASTC_HDR_4x4: { + // UASTC HDR 4x4 + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_hdr = true; m_uastc = true; m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; @@ -387,16 +435,56 @@ namespace basisu } case basist::basis_tex_format::cASTC_HDR_6x6: { + // ASTC HDR 6x6 + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_hdr = true; m_uastc = true; m_hdr_mode = hdr_modes::cASTC_HDR_6X6; break; } - case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: { + // UASTC HDR 6x6 + m_xuastc_or_astc_ldr_basis_tex_format = -1; m_hdr = true; m_uastc = true; - m_hdr_mode = hdr_modes::cASTC_HDR_6X6_INTERMEDIATE; + m_hdr_mode = hdr_modes::cUASTC_HDR_6X6_INTERMEDIATE; + break; + } + case basist::basis_tex_format::cXUASTC_LDR_4x4: + case basist::basis_tex_format::cXUASTC_LDR_5x4: + case basist::basis_tex_format::cXUASTC_LDR_5x5: + case basist::basis_tex_format::cXUASTC_LDR_6x5: + case basist::basis_tex_format::cXUASTC_LDR_6x6: + case basist::basis_tex_format::cXUASTC_LDR_8x5: + case basist::basis_tex_format::cXUASTC_LDR_8x6: + case basist::basis_tex_format::cXUASTC_LDR_10x5: + case basist::basis_tex_format::cXUASTC_LDR_10x6: + case basist::basis_tex_format::cXUASTC_LDR_8x8: + case basist::basis_tex_format::cXUASTC_LDR_10x8: + case basist::basis_tex_format::cXUASTC_LDR_10x10: + case basist::basis_tex_format::cXUASTC_LDR_12x10: + case basist::basis_tex_format::cXUASTC_LDR_12x12: + case basist::basis_tex_format::cASTC_LDR_4x4: + case basist::basis_tex_format::cASTC_LDR_5x4: + case basist::basis_tex_format::cASTC_LDR_5x5: + case basist::basis_tex_format::cASTC_LDR_6x5: + case basist::basis_tex_format::cASTC_LDR_6x6: + case basist::basis_tex_format::cASTC_LDR_8x5: + case basist::basis_tex_format::cASTC_LDR_8x6: + case basist::basis_tex_format::cASTC_LDR_10x5: + case basist::basis_tex_format::cASTC_LDR_10x6: + case basist::basis_tex_format::cASTC_LDR_8x8: + case basist::basis_tex_format::cASTC_LDR_10x8: + case basist::basis_tex_format::cASTC_LDR_10x10: + case basist::basis_tex_format::cASTC_LDR_12x10: + case basist::basis_tex_format::cASTC_LDR_12x12: + { + // ASTC LDR 4x4-12x12 or XUASTC LDR 4x4-12x12 + m_xuastc_or_astc_ldr_basis_tex_format = (int)mode; + m_hdr = false; + m_uastc = true; + m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter break; } default: @@ -405,39 +493,84 @@ namespace basisu } } - // By default we generate LDR ETC1S data. - // if m_uastc is true but m_hdr is not true, we generate UASTC 4x4 LDR data (8bpp with or without RDO). - // if m_uastc is true and m_hdr is true, we generate 4x4 or 6x6 HDR data (either standard ASTC, constrained ASTC, RDO ASTC, or intermediate), controlled by m_hdr_mode. + // Like set_format_mode() but also sets the effort and quality parameters appropriately for the selected mode. + // "Effort" (perf. vs. highest achievable quality) and "quality" (quality vs. bitrate) parameters are now mode dependent. + // Effort ranges from [0,10] and quality ranges from [1,100], unless they are -1 in which case you get the codec's default settings. + bool set_format_mode_and_effort(basist::basis_tex_format mode, int effort = -1, bool set_defaults = true); + bool set_format_mode_and_quality_effort(basist::basis_tex_format mode, int quality = -1, int effort = -1, bool set_defaults = true); + + // Sets all the sRGB-related options (m_perceptual, m_mip_srgb, m_ktx2_and_basis_srgb_transfer_function) to the specified value. + void set_srgb_options(bool srgb_flag) + { + m_perceptual = srgb_flag; + m_mip_srgb = srgb_flag; + m_ktx2_and_basis_srgb_transfer_function = srgb_flag; + } + + // Simpler helpers - I wish this was easier, but backwards API compat is also valuable. + bool is_etc1s() const + { + return !m_uastc; + } + + bool is_uastc_ldr_4x4() const + { + return m_uastc && !m_hdr && (m_xuastc_or_astc_ldr_basis_tex_format == -1); + } + bool is_uastc_hdr_4x4() const + { + return m_uastc && m_hdr && (m_hdr_mode == hdr_modes::cUASTC_HDR_4X4); + } + + // By default we generate LDR ETC1S data. + // Ideally call set_format_mode() above instead of directly manipulating the below fields. These individual parameters are for backwards API compatibility. + // - If m_uastc is false you get ETC1S (the default). + // - If m_uastc is true, and m_hdr is not true, and m_xuastc_or_astc_ldr_basis_tex_format==-1, we generate UASTC 4x4 LDR data (8bpp with or without RDO). + // - If m_uastc is true, and m_hdr is not true, and m_xuastc_or_astc_ldr_basis_tex_format!=-1, we generate XUASTC 4x4-12x12 or ASTC 4x4-12x12 LDR data, controlled by m_xuastc_or_astc_ldr_basis_tex_format. + // - If m_uastc is true and m_hdr is true, we generate 4x4 or 6x6 HDR data, controlled by m_hdr_mode. + // True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S. - // Should be true for any non-ETC1S format (UASTC 4x4 LDR, UASTC 4x4 HDR, RDO ASTC 6x6 HDR, and ASTC 6x6 HDR intermediate). + // Should be true for any non-ETC1S format (UASTC 4x4 LDR, UASTC 4x4 HDR, RDO ASTC 6x6 HDR, UASTC 6x6 HDR, or ASTC/XUASTC LDR 4x4-12x12). + // Note: Ideally call set_format_mode() or set_format_mode_and_quality_effort() above instead. + // Many of these individual parameters are for backwards API compatibility. bool_param m_uastc; // Set m_hdr to true to switch to UASTC HDR mode. m_hdr_mode then controls which format is output. // m_hdr_mode then controls which format is output (4x4, 6x6, or 6x6 intermediate). + // Note: Ideally call set_format_mode() instead. This is for backwards API compatibility. bool_param m_hdr; - + // If m_hdr is true, this specifies which mode we operate in (currently UASTC 4x4 HDR or ASTC 6x6 HDR). Defaults to UASTC 4x4 HDR for backwards compatibility. + // Note: Ideally call set_format_mode() instead. This is for backwards API compatibility. hdr_modes m_hdr_mode; + // If not -1: Generate XUASTC or ASTC LDR 4x4-12x12 files in the specified basis_tex_format (which also sets the ASTC block size). If -1 (the default), don't generate XUASTC/ASTC LDR files. + // m_uastc must also be set to true if this is not -1. + // Note: Ideally call set_format_mode() instead. + param m_xuastc_or_astc_ldr_basis_tex_format; // enum basis_tex_format + + // True to enable OpenCL if it's available. The compressor will fall back to CPU encoding if something goes wrong. bool_param m_use_opencl; - // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. // Otherwise, the compressor processes the images in m_source_images or m_source_images_hdr. basisu::vector m_source_filenames; basisu::vector m_source_alpha_filenames; - + + // An array of 2D LDR/SDR source images. basisu::vector m_source_images; - + + // An array of 2D HDR source images. basisu::vector m_source_images_hdr; - + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. // The compressor applies the user-provided swizzling (in m_swizzle) to these images. basisu::vector< basisu::vector > m_source_mipmap_images; basisu::vector< basisu::vector > m_source_mipmap_images_hdr; - + // Filename of the output basis/ktx2 file std::string m_out_filename; @@ -448,20 +581,24 @@ namespace basisu // If true, the compressor will print basis status to stdout during compression. bool_param m_status_output; - + // Output debug information during compression bool_param m_debug; + + // Low-level ETC1S data validation during encoding (slower/development). bool_param m_validate_etc1s; - + // m_debug_images is pretty slow bool_param m_debug_images; - // ETC1S compression level, from 0 to BASISU_MAX_COMPRESSION_LEVEL (higher is slower). + // ETC1S compression effort level, from 0 to BASISU_MAX_ETC1S_COMPRESSION_LEVEL (higher is slower). // This parameter controls numerous internal encoding speed vs. compression efficiency/performance tradeoffs. // Note this is NOT the same as the ETC1S quality level, and most users shouldn't change this. - param m_compression_level; - - // Use perceptual sRGB colorspace metrics instead of linear + param m_etc1s_compression_level; + + // Use perceptual sRGB colorspace metrics instead of linear. + // Note: You probably also want to set m_ktx2_srgb_transfer_func to match. + // Note: This member variable was previously called "m_perceptual". bool_param m_perceptual; // Disable selector RDO, for faster compression but larger files @@ -476,47 +613,55 @@ namespace basisu // Write the output basis/ktx2 file to disk using m_out_filename bool_param m_write_output_basis_or_ktx2_files; - - // Compute and display image metrics + + // Compute and display image metrics bool_param m_compute_stats; // Print stats to stdout, if m_compute_stats is true. bool_param m_print_stats; - + // Check to see if any input image has an alpha channel, if so then the output basis/ktx2 file will have alpha channels bool_param m_check_for_alpha; - + // Always put alpha slices in the output basis/ktx2 file, even when the input doesn't have alpha - bool_param m_force_alpha; - bool_param m_multithreading; + bool_param m_force_alpha; + // True to enable multithreading in various compressors. + // Note currently, some compressors (like ASTC/XUASTC LDR) will utilize threading anyway if the job pool is more than one thread. + bool_param m_multithreading; + // Split the R channel to RGB and the G channel to alpha, then write a basis/ktx2 file with alpha channels uint8_t m_swizzle[4]; + // Renormalize normal map normals after loading image bool_param m_renormalize; // If true the front end will not use 2 level endpoint codebook searching, for slightly higher quality but much slower execution. - // Note some m_compression_level's disable this automatically. + // Note some m_etc1s_compression_level's disable this automatically. bool_param m_disable_hierarchical_endpoint_codebooks; - + // mipmap generation parameters bool_param m_mip_gen; param m_mip_scale; std::string m_mip_filter; bool_param m_mip_srgb; bool_param m_mip_premultiplied; // not currently supported - bool_param m_mip_renormalize; + bool_param m_mip_renormalize; bool_param m_mip_wrapping; bool_param m_mip_fast; param m_mip_smallest_dimension; - - // ETC1S codebook size (quality) control. - // If m_etc1s_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. + + // ETC1S codebook size (quality) control. + // If m_quality_level (previously named m_etc1s_quality_level) != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. uint32_t m_etc1s_max_endpoint_clusters; uint32_t m_etc1s_max_selector_clusters; - int m_etc1s_quality_level; + // Quality level (bitrate vs. distortion tradeoff) control for ETC1S or XUASTC LDR 4x4-12x12. + // ETC1S: Must set to [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX] to control quality vs. bitrate. If -1 (the default!), quality is controlled by m_etc1s_max_endpoint_clusters and m_etc1s_max_selector_clusters directly. + // XUASTC LDR: Must not be -1 for DCT. + int m_quality_level; + // m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the .basis file header. basist::basis_texture_type m_tex_type; uint32_t m_userdata0; @@ -527,7 +672,7 @@ namespace basisu // cPackUASTCLevelDefault, etc. uint32_t m_pack_uastc_ldr_4x4_flags; bool_param m_rdo_uastc_ldr_4x4; - param m_rdo_uastc_ldr_4x4_quality_scalar; + param m_rdo_uastc_ldr_4x4_quality_scalar; // RDO lambda for UASTC 4x4 LDR param m_rdo_uastc_ldr_4x4_dict_size; param m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale; param m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev; @@ -536,45 +681,82 @@ namespace basisu bool_param m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode; bool_param m_rdo_uastc_ldr_4x4_multithreading; + // Resample input texture after loading param m_resample_width; param m_resample_height; param m_resample_factor; + // ETC1S global codebook control const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; // KTX2 specific parameters. - // Internally, the compressor always creates a .basis file then it converts that lossless to KTX2. + // Internally, the compressor always creates a .basis file then it converts that losslessly to KTX2. bool_param m_create_ktx2_file; basist::ktx2_supercompression m_ktx2_uastc_supercompression; basist::ktx2_transcoder::key_value_vec m_ktx2_key_values; param m_ktx2_zstd_supercompression_level; - bool_param m_ktx2_srgb_transfer_func; - + + // Note: The default for this parameter (which used to be "m_ktx2_srgb_transfer_func") used to be false, now setting this to true and renaming to m_ktx2_and_basis_srgb_transfer_function. + // Also see m_perceptual and m_mip_srgb, which should in most uses be the same. + // This also controls the XUASTC LDR ASTC decode profile (linear vs. sRGB) in the simulated decoder block. + // For XUASTC LDR, it's also still used when generating .basis files vs. .KTX2. + bool_param m_ktx2_and_basis_srgb_transfer_function; // false = linear transfer function, true = sRGB transfer function + + // HDR codec specific options uastc_hdr_4x4_codec_options m_uastc_hdr_4x4_options; - astc_6x6_hdr::astc_hdr_6x6_global_config m_astc_hdr_6x6_options; + astc_6x6_hdr::astc_hdr_6x6_global_config m_astc_hdr_6x6_options; // also UASTC HDR 6x6i + // True to try transcoding the generated output after compression to a few formats. bool_param m_validate_output_data; + + // The flags to use while transcoding if m_validate_output_data + param m_transcode_flags; // LDR->HDR upconversion parameters. - // - // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion), or absolute luminance (nits or candelas per meter squared), and then processed as HDR. + // + // If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion), or absolute luminance (nits or candelas per meter squared), and then processed as HDR. // Otherwise, LDR images are assumed to already be in linear light (i.e. they don't use the sRGB transfer function). bool_param m_ldr_hdr_upconversion_srgb_to_linear; - + // m_ldr_hdr_upconversion_nit_multiplier is only used when loading SDR/LDR images and compressing to an HDR output format. - // By default m_ldr_hdr_upconversion_nit_multiplier is 0. It's an override for the default. - // When loading LDR images, a default multiplier of 1.0 will be used in UASTC 4x4 HDR mode. Partially for backwards compatibility with previous library releases, and also because it doesn't really matter with this encoder what the multiplier is. - // With the 6x6 HDR encoder it does matter because it expects inputs in absolute nits, so the LDR upconversion luminance multiplier default will be 100 nits. (Most SDR monitors were/are 80-100 nits or so.) + // By default m_ldr_hdr_upconversion_nit_multiplier is 0. It's an override for the default, which is now 100.0 nits (LDR_TO_HDR_NITS). + // UASTC HDR 4x4: The default multiplier of 1.0 was previously used in this codec's original release. Note this encoder isn't dependent on absolute nits, unlike the ASTC 6x6 HDR encoder. + // RDO ASTC HDR 6x6/UASTC HDR 6x6i: These encoders expect inputs in absolute nits, so the LDR upconversion luminance multiplier default will be 100 nits. (Most SDR monitors were/are 80-100 nits or so.) param m_ldr_hdr_upconversion_nit_multiplier; // The optional sRGB space bias to use during LDR->HDR upconversion. Should be between [0,.49] or so. Only applied on black (0.0) color components. // Defaults to no bias (0.0f). param m_ldr_hdr_upconversion_black_bias; - // If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance. + // If true, ASTC HDR quality is favored more than BC6H quality by the dual target encoder. Otherwise it's a rough balance. + // UASTC HDR 4x4 bool_param m_hdr_favor_astc; + // XUASTC LDR 4x4-12x12 specific options + param m_xuastc_ldr_effort_level; + bool_param m_xuastc_ldr_use_dct; // set the DCT quality above using m_quality_level, [1,100] + bool_param m_xuastc_ldr_use_lossy_supercompression; // allows the compressor to introduce a bounded amount of distortion if doing so would make smaller files (actually ASTC or XUASTC) + bool_param m_xuastc_ldr_force_disable_subsets; // disable 2-3 subset usage in all effort levels, faster encoding, faster transcoding to BC7, but lower quality) + bool_param m_xuastc_ldr_force_disable_rgb_dual_plane; // disable RGB dual plane usage (still can use dual plane on alpha blocks), for faster transcoding to BC7 but lower quality + param m_xuastc_ldr_syntax; // favor faster decompression over ratio, default is basist::astc_ldr_t::xuastc_ldr_syntax::cFullZstd (fastest transcoding but lower ratio) + uint32_t m_xuastc_ldr_channel_weights[4]; + bool_param m_xuastc_ldr_blurring; // experimental, not recommended, very slow + + // XUASTC Lossy supercompression PSNR threshold parameters + param m_ls_min_psnr, m_ls_min_alpha_psnr; + param m_ls_thresh_psnr, m_ls_thresh_alpha_psnr; + param m_ls_thresh_edge_psnr, m_ls_thresh_edge_alpha_psnr; + + // Job pool, MUST not be nullptr; job_pool *m_pJob_pool; + + // Returns the current format mode as set by set_format_mode() above. + // Because of backwards API compatibility we don't use this directly yet, it's just here to aid the transition to the new API. + basist::basis_tex_format get_format_mode() const { return m_format_mode; } + + private: + // This is set by set_format_mode() above. For backwards API compat we don't use it directly, it's just here to aid the transition to the new API. + basist::basis_tex_format m_format_mode; }; // Important: basisu_encoder_init() MUST be called first before using this class. @@ -588,7 +770,7 @@ namespace basisu // Note it *should* be possible to call init() multiple times with different inputs, but this scenario isn't well tested. Ideally, create 1 object, compress, then delete it. bool init(const basis_compressor_params ¶ms); - + enum error_code { cECSuccess = 0, @@ -597,50 +779,68 @@ namespace basisu cECFailedValidating, cECFailedEncodeUASTC, cECFailedFrontEnd, - cECFailedFontendExtract, + cECFailedFrontendExtract, cECFailedBackend, cECFailedCreateBasisFile, cECFailedWritingOutput, cECFailedUASTCRDOPostProcess, - cECFailedCreateKTX2File + cECFailedCreateKTX2File, + cECFailedInvalidParameters }; error_code process(); // The output .basis file will always be valid of process() succeeded. const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } - + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } const basisu::vector &get_stats() const { return m_stats; } - uint32_t get_basis_file_size() const { return m_basis_file_size; } + // Sum of all slice orig pixels. Intended for statistics display. + uint64_t get_total_slice_orig_texels() const { return m_total_slice_orig_texels; } + + uint64_t get_basis_file_size() const { return m_basis_file_size; } double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } + uint64_t get_ktx2_file_size() const { return m_ktx2_file_size; } + double get_ktx2_bits_per_texel() const { return m_ktx2_bits_per_texel; } + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } bool get_opencl_failed() const { return m_opencl_failed; } - + private: basis_compressor_params m_params; - + opencl_context_ptr m_pOpenCL_context; - basist::basis_tex_format m_fmt_mode; - + // the output mode/codec + basist::basis_tex_format m_fmt_mode; + + // the output mode/codec's block width/height + uint32_t m_fmt_mode_block_width; + uint32_t m_fmt_mode_block_height; + + // Note these images are expanded if necessary (duplicating cols/rows) to account for block dimensions. basisu::vector m_slice_images; basisu::vector m_slice_images_hdr; basisu::vector m_stats; - uint32_t m_basis_file_size; + uint64_t m_total_slice_orig_texels; + + uint64_t m_basis_file_size; double m_basis_bits_per_texel; + uint64_t m_ktx2_file_size; + double m_ktx2_bits_per_texel; + basisu_backend_slice_desc_vec m_slice_descs; uint32_t m_total_blocks; - + basisu_frontend m_frontend; // These are 4x4 blocks. @@ -658,7 +858,7 @@ namespace basisu basisu::vector m_decoded_output_textures; // BC6H in HDR mode basisu::vector m_decoded_output_textures_unpacked; - + basisu::vector m_decoded_output_textures_bc7; basisu::vector m_decoded_output_textures_unpacked_bc7; @@ -669,16 +869,16 @@ namespace basisu uint8_vec m_output_basis_file; uint8_vec m_output_ktx2_file; - + basisu::vector m_uastc_slice_textures; basisu_backend_output m_uastc_backend_output; // The amount the HDR input has to be scaled up in case it had to be rescaled to fit into half floats. - float m_hdr_image_scale; - + float m_hdr_image_scale; + // The upconversion multiplier used to load LDR images in HDR mode. float m_ldr_to_hdr_upconversion_nit_multiplier; - + // True if any loaded source images were LDR and upconverted to HDR. bool m_upconverted_any_ldr_images; @@ -701,48 +901,20 @@ namespace basisu error_code encode_slices_to_astc_6x6_hdr(); error_code encode_slices_to_uastc_4x4_hdr(); error_code encode_slices_to_uastc_4x4_ldr(); + error_code encode_slices_to_xuastc_or_astc_ldr(); bool generate_mipmaps(const imagef& img, basisu::vector& mips, bool has_alpha); bool generate_mipmaps(const image &img, basisu::vector &mips, bool has_alpha); bool validate_texture_type_constraints(); bool validate_ktx2_constraints(); bool get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); bool create_ktx2_file(); - void pick_format_mode(); - - uint32_t get_block_width() const - { - if (m_params.m_hdr) - { - switch (m_params.m_hdr_mode) - { - case hdr_modes::cASTC_HDR_6X6: - case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: - return 6; - default: - break; - } - } - return 4; - } + bool pick_format_mode(); - uint32_t get_block_height() const - { - if (m_params.m_hdr) - { - switch (m_params.m_hdr_mode) - { - case hdr_modes::cASTC_HDR_6X6: - case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE: - return 6; - default: - break; - } - } - return 4; - } + uint32_t get_block_width() const { return m_fmt_mode_block_width; } + uint32_t get_block_height() const { return m_fmt_mode_block_height; } }; - - // Alternative simple C-style wrapper API around the basis_compressor class. + + // Alternative simple C-style wrapper API around the basis_compressor class. // This doesn't expose every encoder feature, but it's enough to get going. // Important: basisu_encoder_init() MUST be called first before calling these functions. // @@ -751,16 +923,22 @@ namespace basisu // OR // pImageRGBA: pointer to a 32-bpp RGBx or RGBA raster image, R first in memory, A last. Top scanline first in memory. // width/height/pitch_in_pixels: dimensions of pImageRGBA - // - // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault". + // + // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC quality or effort level. + // Note: basis_compress2() variants below accept the new-style "quality_level" (0-100) and "effort_level" (0-10) parameters instead of packing them into flags_and_quality. // In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files) - // In UASTC mode, the lower 8-bits are the UASTC LDR/HDR pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR. - // In UASTC mode, be sure to set this, otherwise it defaults to 0 (fastest/lowest quality). - // - // uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5) - // + // In UASTC LDR 4x4 mode, the lower 8-bits are the UASTC LDR/HDR pack or effort level (see cPackUASTCLevelFastest to cPackUASTCLevelVerySlow). Fastest/lowest quality is 0, so be sure to set it correctly. Valid values are [0,4] for both LDR/HDR. + // In UASTC HDR 4x4 mode, the lower 8-bits are the codec's effort level. Valid range is [uastc_hdr_4x4_codec_options::cMinLevel, uastc_hdr_4x4_codec_options::cMaxLevel]. Higher=better quality, but slower. + // In RDO ASTC HDR 6x6/UASTC HDR 6x6 mode, the lower 8-bits are the codec's effort level. Valid range is [0,astc_6x6_hdr::ASTC_HDR_6X6_MAX_USER_COMP_LEVEL]. Higher levels=better quality, but slower. + // In XUASTC/ASTC LDR 4x4-12x12 mode, the lower 8-bits are the compressor's effort level from [0,10] (astc_ldr_t::EFFORT_LEVEL_MIN, astc_ldr_t::EFFORT_LEVEL_MAX). + // + // float uastc_rdo_or_dct_quality: + // UASTC LDR 4x4 RDO quality level: RDO lambda setting - 0=no change/highest quality. Higher values lower quality but increase compressibility, initially try .5-1.5. + // RDO ASTC 6x6 HDR/UASTC 6x6 HDR: RDO lambda setting. 0=no change/highest quality. Higher values lower quality but increase compressibility, initially try 250-2000 (HDR) or 1000-10000 (LDR/SDR inputs upconverted to HDR). + // In XUASTC/ASTC LDR 4x4-12x12 mode, this is the [1,100] weight grid DCT quality level. + // // pSize: Returns the output data's compressed size in bytes - // + // // Return value is the compressed .basis or .ktx2 file data, or nullptr on failure. Must call basis_free() to free it. enum { @@ -769,36 +947,63 @@ namespace basisu cFlagDebug = 1 << 10, // enable debug output cFlagKTX2 = 1 << 11, // generate a KTX2 file - cFlagKTX2UASTCSuperCompression = 1 << 12, // use KTX2 Zstd supercompression on UASTC files + cFlagKTX2UASTCSuperCompression = 1 << 12, // use KTX2 Zstd supercompression on non-supercompressed formats that support it. - cFlagSRGB = 1 << 13, // input texture is sRGB, use perceptual colorspace metrics, also use sRGB filtering during mipmap gen, and also sets KTX2 output transfer func to sRGB + cFlagSRGB = 1 << 13, // input texture is sRGB, use perceptual colorspace metrics, also use sRGB filtering during mipmap gen, and also sets KTX2/.basis output transfer func to sRGB cFlagGenMipsClamp = 1 << 14, // generate mipmaps with clamp addressing cFlagGenMipsWrap = 1 << 15, // generate mipmaps with wrap addressing - + cFlagYFlip = 1 << 16, // flip source image on Y axis before compression - - cFlagUASTCRDO = 1 << 17, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar) - + + // Note 11/18/2025: cFlagUASTCRDO flag is now ignored. Now if uastc_rdo_or_dct_quality>0 in UASTC LDR 4x4 mode, you automatically get RDO. + //cFlagUASTCRDO = 1 << 17, // use RDO postprocessing when generating UASTC LDR 4x4 files (must set uastc_rdo_or_dct_quality to the quality scalar) + cFlagPrintStats = 1 << 18, // print image stats to stdout cFlagPrintStatus = 1 << 19, // print status to stdout + + cFlagDebugImages = 1 << 20, // enable debug image generation (for development, slower) - cFlagDebugImages = 1 << 20, // enable status output - - cFlagREC2020 = 1 << 21, // ASTC 6x6 modes: treat input as REC 2020 vs. the default 709 - + cFlagREC2020 = 1 << 21, // treat input as REC 2020 vs. the default 709 (for codecs that support this, currently UASTC HDR and ASTC 6x6), bit is always placed into KTX2 DFD + cFlagValidateOutput = 1 << 22, // transcode the output after encoding for testing + + // XUASTC LDR profile: full arith, hybrid or full zstd (see basist::astc_ldr_t::xuastc_ldr_syntax) + cFlagXUASTCLDRSyntaxFullArith = 0 << 23, + cFlagXUASTCLDRSyntaxHybrid = 1 << 23, + cFlagXUASTCLDRSyntaxFullZStd = 2 << 23, + + cFlagXUASTCLDRSyntaxShift = 23, + cFlagXUASTCLDRSyntaxMask = 3, + + // Texture Type: 2D, 2D Array, Cubemap Array, or Texture Video (see enum basis_texture_type). Defaults to plain 2D. + cFlagTextureType2D = 0 << 25, + cFlagTextureType2DArray = 1 << 25, + cFlagTextureTypeCubemapArray = 2 << 25, + cFlagTextureTypeVideoFrames = 3 << 25, + + cFlagTextureTypeShift = 25, + cFlagTextureTypeMask = 3, }; - // This function accepts an array of source images. + void* basis_compress_internal( + basist::basis_tex_format mode, + const basisu::vector* pSource_images, + const basisu::vector* pSource_images_hdr, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats, + int quality_level = -1, int effort_level = -1); + + // This function accepts an array of source images. // If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled. - // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. + // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. // Important: The returned block MUST be manually freed using basis_free_data(). // basisu_encoder_init() MUST be called first! - // LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag. + // LDR version. void* basis_compress( basist::basis_tex_format mode, const basisu::vector &source_images, - uint32_t flags_and_quality, float uastc_rdo_quality, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, size_t* pSize, image_stats* pStats = nullptr); @@ -807,7 +1012,7 @@ namespace basisu void* basis_compress( basist::basis_tex_format mode, const basisu::vector& source_images_hdr, - uint32_t flags_and_quality, float lambda, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, size_t* pSize, image_stats* pStats = nullptr); @@ -816,7 +1021,30 @@ namespace basisu void* basis_compress( basist::basis_tex_format mode, const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, - uint32_t flags_and_quality, float uastc_rdo_quality, + uint32_t flags_and_quality, float uastc_rdo_or_dct_quality, + size_t* pSize, + image_stats* pStats = nullptr); + + // basis_compress2() variants accept the new unified quality_level and effort_level parameters instead of the old flags/float uastc_rdo_or_dct_quality parameter. + // quality_level must be [0,100], effort_level [0,10]. + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats = nullptr); + + void* basis_compress2( + basist::basis_tex_format mode, + const basisu::vector& source_images_hdr, + uint32_t flags_and_quality, int quality_level, int effort_level, + size_t* pSize, + image_stats* pStats = nullptr); + + void* basis_compress2( + basist::basis_tex_format mode, + const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels, + uint32_t flags_and_quality, int quality_level, int effort_level, size_t* pSize, image_stats* pStats = nullptr); @@ -841,7 +1069,7 @@ namespace basisu double m_basis_bits_per_texel; bool m_any_source_image_has_alpha; - parallel_results() + parallel_results() { clear(); } @@ -857,7 +1085,7 @@ namespace basisu m_any_source_image_has_alpha = false; } }; - + // Compresses an array of input textures across total_threads threads using the basis_compressor class. // Compressing multiple textures at a time is substantially more efficient than just compressing one at a time. // total_threads must be >= 1. @@ -865,5 +1093,6 @@ namespace basisu uint32_t total_threads, const basisu::vector ¶ms_vec, basisu::vector< parallel_results > &results_vec); - + } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_enc.cpp b/external/basis_universal/encoder/basisu_enc.cpp index cccf66c171..63fcab9273 100644 --- a/external/basis_universal/encoder/basisu_enc.cpp +++ b/external/basis_universal/encoder/basisu_enc.cpp @@ -1,5 +1,5 @@ // basisu_enc.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ #include "basisu_opencl.h" #include "basisu_uastc_hdr_4x4_enc.h" #include "basisu_astc_hdr_6x6_enc.h" +#include "basisu_astc_ldr_common.h" +#include "basisu_astc_ldr_encode.h" #include @@ -58,7 +60,7 @@ namespace basisu #endif fast_linear_to_srgb g_fast_linear_to_srgb; - + uint8_t g_hamming_dist[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, @@ -81,7 +83,7 @@ namespace basisu // This is a Public Domain 8x8 font from here: // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h - const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) @@ -181,9 +183,17 @@ namespace basisu { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F }; + float g_srgb_to_linear_table[256]; + + void init_srgb_to_linear_table() + { + for (int i = 0; i < 256; ++i) + g_srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f / 255.0f)); + } + bool g_library_initialized; std::mutex g_encoder_init_mutex; - + // Encoder library initialization (just call once at startup) bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) { @@ -193,7 +203,7 @@ namespace basisu return true; detect_sse41(); - + basist::basisu_transcoder_init(); pack_etc1_solid_color_init(); //uastc_init(); @@ -210,7 +220,11 @@ namespace basisu astc_hdr_enc_init(); basist::bc6h_enc_init(); astc_6x6_hdr::global_init(); + astc_ldr::global_init(); + astc_ldr::encoder_init(); + init_srgb_to_linear_table(); + g_library_initialized = true; return true; } @@ -221,7 +235,7 @@ namespace basisu g_library_initialized = false; } - + void error_vprintf(const char* pFmt, va_list args) { const uint32_t BUF_SIZE = 256; @@ -238,10 +252,12 @@ namespace basisu return; } + fflush(stdout); + if (total_chars >= (int)BUF_SIZE) { basisu::vector var_buf(total_chars + 1); - + va_copy(args_copy, args); int total_chars_retry = vsnprintf(var_buf.data(), var_buf.size(), pFmt, args_copy); va_end(args_copy); @@ -277,6 +293,7 @@ namespace basisu void platform_sleep(uint32_t ms) { // TODO + BASISU_NOTE_UNUSED(ms); } #endif @@ -316,7 +333,7 @@ namespace basisu #else #error TODO #endif - + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_timer_freq) @@ -350,7 +367,7 @@ namespace basisu timer_ticks delta = stop_time - m_start_time; return delta * g_timer_freq; } - + void interval_timer::init() { if (!g_timer_freq) @@ -377,7 +394,7 @@ namespace basisu return ticks * g_timer_freq; } - // Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions. + // Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions. // However the gamuts/white points of REC709 and sRGB are the same. float linear_to_srgb(float l) { @@ -387,7 +404,7 @@ namespace basisu else return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f); } - + float srgb_to_linear(float s) { assert(s >= 0.0f && s <= 1.0f); @@ -396,21 +413,21 @@ namespace basisu else return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f)); } - + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; - + bool load_tga(const char* pFilename, image& img) { int w = 0, h = 0, n_chans = 0; uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); - + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) { error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); if (pImage_data) free(pImage_data); - + return false; } @@ -426,7 +443,7 @@ namespace basisu return false; } } - + img.resize(w, h); const uint8_t *pSrc = pImage_data; @@ -469,7 +486,7 @@ namespace basisu { interval_timer tm; tm.start(); - + if (!buf_size) return false; @@ -488,7 +505,7 @@ namespace basisu return true; } - + bool load_png(const char* pFilename, image& img) { uint8_vec buffer; @@ -507,9 +524,9 @@ namespace basisu uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); if (!pImage_data) return false; - + img.init(pImage_data, width, height, 4); - + free(pImage_data); return true; @@ -556,7 +573,7 @@ namespace basisu return false; } - static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f) + void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias) { img.resize(ldr_img.get_width(), ldr_img.get_height()); @@ -642,7 +659,7 @@ namespace basisu dst[2] = basist::half_to_float(pSrc_pixel[2]); dst[3] = basist::half_to_float(pSrc_pixel[3]); } - + pSrc_image_h += (width * 4); } @@ -722,7 +739,7 @@ namespace basisu return ((strcasecmp(pExt, "hdr") == 0) || (strcasecmp(pExt, "exr") == 0)); } - + // TODO: move parameters to struct, add a HDR clean flag to eliminate NaN's/Inf's bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias) { @@ -740,7 +757,7 @@ namespace basisu return false; return true; } - + if (strcasecmp(pExt, "exr") == 0) { int n_chans = 0; @@ -760,12 +777,12 @@ namespace basisu return true; } - + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { if (!img.get_total_pixels()) return false; - + void* pPNG_data = nullptr; size_t PNG_data_size = 0; @@ -783,7 +800,7 @@ namespace basisu else { bool has_alpha = false; - + if ((image_save_flags & cImageSaveIgnoreAlpha) == 0) has_alpha = img.has_alpha(); @@ -800,7 +817,7 @@ namespace basisu pDst[0] = pSrc->r; pDst[1] = pSrc->g; pDst[2] = pSrc->b; - + pSrc++; pDst += 3; } @@ -824,10 +841,35 @@ namespace basisu } free(pPNG_data); - + return status; } + bool save_qoi(const char* pFilename, const image& img, uint32_t qoi_colorspace) + { + assert(img.get_width() && img.get_height()); + + qoi_desc desc; + clear_obj(desc); + + desc.width = img.get_width(); + desc.height = img.get_height(); + desc.channels = 4; + desc.colorspace = (uint8_t)qoi_colorspace; + + int out_len = 0; + void* pData = qoi_encode(img.get_ptr(), &desc, &out_len); + if ((!pData) || (!out_len)) + return false; + + const bool status = write_data_to_file(pFilename, pData, out_len); + + QOI_FREE(pData); + pData = nullptr; + + return status; + } + bool read_file_to_vec(const char* pFilename, uint8_vec& data) { FILE* pFile = nullptr; @@ -838,7 +880,7 @@ namespace basisu #endif if (!pFile) return false; - + fseek(pFile, 0, SEEK_END); #ifdef _WIN32 int64_t filesize = _ftelli64(pFile); @@ -909,7 +951,7 @@ namespace basisu return false; } fseek(pFile, 0, SEEK_SET); - + if (fread(pData, 1, (size_t)len, pFile) != (size_t)len) { fclose(pFile); @@ -942,19 +984,20 @@ namespace basisu return fclose(pFile) != EOF; } - + bool image_resample(const image &src, image &dst, bool srgb, - const char *pFilter, float filter_scale, + const char *pFilter, float filter_scale, bool wrapping, - uint32_t first_comp, uint32_t num_comps) + uint32_t first_comp, uint32_t num_comps, + float filter_scale_y) { assert((first_comp + num_comps) <= 4); const int cMaxComps = 4; - + const uint32_t src_w = src.get_width(), src_h = src.get_height(); const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); - + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) { printf("Image is too large!\n"); @@ -963,17 +1006,19 @@ namespace basisu if (!src_w || !src_h || !dst_w || !dst_h) return false; - + if ((num_comps < 1) || (num_comps > cMaxComps)) return false; - + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) { printf("Image is too large!\n"); return false; } - if ((src_w == dst_w) && (src_h == dst_h)) + if ( (src_w == dst_w) && (src_h == dst_h) && + (filter_scale == 1.0f) && + ((filter_scale_y < 0.0f) || (filter_scale_y == 1.0f)) ) { dst = src; return true; @@ -997,17 +1042,19 @@ namespace basisu std::vector samples[cMaxComps]; Resampler *resamplers[cMaxComps]; - + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, - pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); + pFilter, nullptr, nullptr, + filter_scale, (filter_scale_y >= 0.0f) ? filter_scale_y : filter_scale, 0, 0); samples[0].resize(src_w); for (uint32_t i = 1; i < num_comps; ++i) { resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, - pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); + pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), + filter_scale, (filter_scale_y >= 0.0f) ? filter_scale_y : filter_scale, 0, 0); samples[i].resize(src_w); } @@ -1057,7 +1104,7 @@ namespace basisu break; const bool linear_flag = !srgb || (comp_index == 3); - + color_rgba *pDst = &dst(0, dst_y); for (uint32_t x = 0; x < dst_w; x++) @@ -1090,7 +1137,7 @@ namespace basisu return true; } - bool image_resample(const imagef& src, imagef& dst, + bool image_resample(const imagef& src, imagef& dst, const char* pFilter, float filter_scale, bool wrapping, uint32_t first_comp, uint32_t num_comps) @@ -1183,7 +1230,7 @@ namespace basisu const float* pOutput_samples = resamplers[c]->get_line(); if (!pOutput_samples) break; - + vec4F* pDst = &dst(0, dst_y); for (uint32_t x = 0; x < dst_w; x++) @@ -1216,9 +1263,9 @@ namespace basisu A[0].m_key = 1; return; } - + A[0].m_key += A[1].m_key; - + int s = 2, r = 0, next; for (next = 1; next < (num_syms - 1); ++next) { @@ -1310,7 +1357,7 @@ namespace basisu for (i = 0; i < num_syms; i++) { uint32_t freq = pSyms0[i].m_key; - + // We scale all input frequencies to 16-bits. assert(freq <= UINT16_MAX); @@ -1501,7 +1548,7 @@ namespace basisu uint32_t total_used = tab.get_total_used_codes(); put_bits(total_used, cHuffmanMaxSymsLog2); - + if (!total_used) return 0; @@ -1565,7 +1612,7 @@ namespace basisu const uint32_t l = syms[i] & 63, e = syms[i] >> 6; put_code(l, ct); - + if (l == cHuffmanSmallZeroRunCode) put_bits(e, cHuffmanSmallZeroRunExtraBits); else if (l == cHuffmanBigZeroRunCode) @@ -1592,7 +1639,7 @@ namespace basisu huffman_encoding_table etab; etab.init(h, 16); - + { bitwise_coder c; c.init(1024); @@ -1727,9 +1774,9 @@ namespace basisu // We now have chosen an entry to place in the picked list, now determine which side it goes on. const uint32_t entry_to_move = m_entries_to_do[best_entry]; - + float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); - + // Put entry_to_move either on the "left" or "right" side of the picked entries if (side <= 0) m_entries_picked.push_back(entry_to_move); @@ -1832,7 +1879,7 @@ namespace basisu } return which_side; } - + void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log) { assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); @@ -1843,16 +1890,19 @@ namespace basisu double max_e = -1e+30f; double sum = 0.0f, sum_sqr = 0.0f; + m_width = width; + m_height = height; + m_has_neg = false; m_any_abnormal = false; m_hf_mag_overflow = false; - + for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { const vec4F& ca = a(x, y), &cb = b(x, y); - + if (total_chans) { for (uint32_t c = 0; c < total_chans; c++) @@ -1867,7 +1917,7 @@ namespace basisu if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb)) m_any_abnormal = true; - + const double delta = fabs(fa - fb); max_e = basisu::maximum(max_e, delta); @@ -1902,10 +1952,10 @@ namespace basisu } double ca_l = get_luminance(ca), cb_l = get_luminance(cb); - + double delta = fabs(ca_l - cb_l); max_e = basisu::maximum(max_e, delta); - + if (log) { double log2_delta = log2(basisu::maximum(0.0f, ca_l) + 1.0f) - log2(basisu::maximum(0.0f, cb_l) + 1.0f); @@ -1931,7 +1981,7 @@ namespace basisu m_mean = (float)(sum / total_values); m_mean_squared = (float)(sum_sqr / total_values); m_rms = (float)sqrt(sum_sqr / total_values); - + const double max_val = 1.0f; m_psnr = m_rms ? (float)clamp(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f; } @@ -1944,12 +1994,15 @@ namespace basisu const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + m_width = width; + m_height = height; + m_has_neg = false; m_hf_mag_overflow = false; m_any_abnormal = false; uint_vec hist(65536); - + for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) @@ -1960,7 +2013,7 @@ namespace basisu { if ((ca[i] < 0.0f) || (cb[i] < 0.0f)) m_has_neg = true; - + if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT)) m_hf_mag_overflow = true; @@ -2010,10 +2063,13 @@ namespace basisu const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + m_width = width; + m_height = height; + m_has_neg = false; m_hf_mag_overflow = false; m_any_abnormal = false; - + double sum = 0.0f, sum2 = 0.0f; m_max = 0; @@ -2050,7 +2106,7 @@ namespace basisu } // x } // y - + double total_values = (double)width * (double)height; if (avg_comp_error) total_values *= (double)clamp(total_chans, 1, 4); @@ -2069,12 +2125,17 @@ namespace basisu const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + m_width = width; + m_height = height; + double hist[256]; clear_obj(hist); m_has_neg = false; m_any_abnormal = false; m_hf_mag_overflow = false; + m_sum_a = 0; + m_sum_b = 0; for (uint32_t y = 0; y < height; y++) { @@ -2085,7 +2146,11 @@ namespace basisu if (total_chans) { for (uint32_t c = 0; c < total_chans; c++) + { hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++; + m_sum_a += ca[first_chan + c]; + m_sum_b += cb[first_chan + c]; + } } else { @@ -2093,6 +2158,12 @@ namespace basisu hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++; else hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++; + + for (uint32_t c = 0; c < 3; c++) + { + m_sum_a += ca[c]; + m_sum_b += cb[c]; + } } } } @@ -2168,63 +2239,7 @@ namespace basisu } } - uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) - { - if (!pBuf || !len) - return 0; - - uint32_t h = static_cast(len); - - const uint32_t bytes_left = len & 3; - len >>= 2; - - while (len--) - { - const uint16_t *pWords = reinterpret_cast(pBuf); - - h += pWords[0]; - - const uint32_t t = (pWords[1] << 11) ^ h; - h = (h << 16) ^ t; - - pBuf += sizeof(uint32_t); - - h += h >> 11; - } - - switch (bytes_left) - { - case 1: - h += *reinterpret_cast(pBuf); - h ^= h << 10; - h += h >> 1; - break; - case 2: - h += *reinterpret_cast(pBuf); - h ^= h << 11; - h += h >> 17; - break; - case 3: - h += *reinterpret_cast(pBuf); - h ^= h << 16; - h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; - h += h >> 11; - break; - default: - break; - } - - h ^= h << 3; - h += h >> 5; - h ^= h << 4; - h += h >> 17; - h ^= h << 25; - h += h >> 6; - - return h; - } - - job_pool::job_pool(uint32_t num_threads) : + job_pool::job_pool(uint32_t num_threads) : m_num_active_jobs(0) { m_kill_flag.store(false); @@ -2246,13 +2261,13 @@ namespace basisu job_pool::~job_pool() { debug_printf("job_pool::~job_pool\n"); - + // Notify all workers that they need to die right now. { std::lock_guard lk(m_mutex); m_kill_flag.store(true); } - + m_has_work.notify_all(); #ifdef __EMSCRIPTEN__ @@ -2262,7 +2277,7 @@ namespace basisu break; std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - + // At this point all worker threads should be exiting or exited. // We could call detach(), but this seems to just call join() anyway. #endif @@ -2271,7 +2286,7 @@ namespace basisu for (uint32_t i = 0; i < m_threads.size(); i++) m_threads[i].join(); } - + void job_pool::add_job(const std::function& job) { std::unique_lock lock(m_mutex); @@ -2291,7 +2306,7 @@ namespace basisu std::unique_lock lock(m_mutex); m_queue.emplace_back(std::move(job)); - + const size_t queue_size = m_queue.size(); lock.unlock(); @@ -2340,7 +2355,7 @@ namespace basisu //debug_printf("job_pool::job_thread: starting %u\n", index); m_num_active_workers.fetch_add(1); - + while (!m_kill_flag) { std::unique_lock lock(m_mutex); @@ -2376,9 +2391,9 @@ namespace basisu --m_num_active_jobs; - // Now check if there are no more jobs remaining. + // Now check if there are no more jobs remaining. const bool all_done = m_queue.empty() && !m_num_active_jobs; - + lock.unlock(); if (all_done) @@ -2439,7 +2454,7 @@ namespace basisu // Simple validation if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) return nullptr; - + if (hdr.m_cmap) { if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) @@ -2598,13 +2613,13 @@ namespace basisu bytes_remaining += bytes_to_skip; } } - + width = hdr.m_width; height = hdr.m_height; const uint32_t source_pitch = width * tga_bytes_per_pixel; const uint32_t dest_pitch = width * n_chans; - + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); if (!pImage) return nullptr; @@ -2626,7 +2641,7 @@ namespace basisu int pixels_remaining = width; uint8_t *pDst = &input_line_buf[0]; - do + do { if (!run_remaining) { @@ -2811,7 +2826,7 @@ namespace basisu if (!filedata.size() || (filedata.size() > UINT32_MAX)) return nullptr; - + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); } @@ -2958,13 +2973,13 @@ namespace basisu if (cur_line.size() < 3) return false; - + if (!is_x && !is_y) return false; comp[d] = is_x ? 0 : 1; dir[d] = (is_neg_x || is_neg_y) ? -1 : 1; - + uint32_t& dim = d ? minor_dim : major_dim; cur_line.erase(0, 3); @@ -3002,7 +3017,7 @@ namespace basisu if ((dim < 1) || (dim > MAX_SUPPORTED_DIM)) return false; } - + // temp image: width=minor, height=major img.resize(minor_dim, major_dim); @@ -3030,7 +3045,7 @@ namespace basisu } else { - // c[0]/red is 2.Check GB and E for validity. + // c[0]/red is 2.Check GB and E for validity. color_rgba c; memcpy(&c, &filedata[cur_ofs], 4); @@ -3152,7 +3167,7 @@ namespace basisu // width=minor axis dimension // height=major axis dimension // in file, pixels are emitted in minor order, them major (so major=scanlines in the file) - + imagef final_img; if (comp[0] == 0) // if major axis is X final_img.resize(major_dim, minor_dim); @@ -3169,10 +3184,10 @@ namespace basisu uint32_t dst_x = 0, dst_y = 0; // is the minor dim output x? - if (comp[1] == 0) + if (comp[1] == 0) { // minor axis is x, major is y - + // is minor axis (which is output x) flipped? if (dir[1] < 0) dst_x = minor_dim - 1 - minor_iter; @@ -3231,7 +3246,7 @@ namespace basisu return buf; } - + static uint8_vec& append_string(uint8_vec& buf, const std::string& str) { if (!str.size()) @@ -3248,7 +3263,7 @@ namespace basisu if (max_v < 1e-32f) rgbe.clear(); - else + else { int e; const float scale = frexp(max_v, &e) * 256.0f / max_v; @@ -3261,14 +3276,14 @@ namespace basisu const bool RGBE_FORCE_RAW = false; const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should - + bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info) { if (!img.get_width() || !img.get_height()) return false; const uint32_t width = img.get_width(), height = img.get_height(); - + file_data.resize(0); file_data.reserve(1024 + img.get_width() * img.get_height() * 4); @@ -3301,7 +3316,7 @@ namespace basisu { int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1; uint32_t cur_run_len = 0; - + for (uint32_t x = 0; x < width; x++) { color_rgba rgbe; @@ -3314,7 +3329,7 @@ namespace basisu // this ensures rshift stays 0, it's lame but this path is only for testing readers color_rgba f(1, 1, 1, cur_run_len - 1); append_vector(file_data, (const uint8_t*)&f, sizeof(f)); - append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); cur_run_len = 0; } } @@ -3324,12 +3339,12 @@ namespace basisu { color_rgba f(1, 1, 1, cur_run_len); append_vector(file_data, (const uint8_t*)&f, sizeof(f)); - + cur_run_len = 0; } - + append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); - + prev_r = rgbe[0]; prev_g = rgbe[1]; prev_b = rgbe[2]; @@ -3354,7 +3369,7 @@ namespace basisu { color_rgba rgbe(2, 2, width >> 8, width & 0xFF); append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe)); - + for (uint32_t x = 0; x < width; x++) { float2rgbe(rgbe, img(x, y)); @@ -3366,7 +3381,7 @@ namespace basisu for (uint32_t c = 0; c < 4; c++) { int raw_ofs = -1; - + uint32_t x = 0; while (x < width) { @@ -3381,7 +3396,7 @@ namespace basisu break; run_len++; } - + const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE) @@ -3405,7 +3420,7 @@ namespace basisu raw_ofs = -1; file_data.push_back(cur_byte); - + x++; } } // x @@ -3424,7 +3439,7 @@ namespace basisu return false; return write_vec_to_file(pFilename, file_data); } - + bool read_exr(const char* pFilename, imagef& img, int& n_chans) { n_chans = 0; @@ -3432,7 +3447,7 @@ namespace basisu int width = 0, height = 0; float* out_rgba = nullptr; const char* err = nullptr; - + int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans); if (status != 0) { @@ -3451,7 +3466,7 @@ namespace basisu } img.resize(width, height); - + if (n_chans == 1) { const float* pSrc = out_rgba; @@ -3505,16 +3520,16 @@ namespace basisu { assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4)); - const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0, + const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0, store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0, no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0; - + const uint32_t width = img.get_width(), height = img.get_height(); assert(width && height); - + if (!width || !height) return false; - + float_vec layers[4]; float* image_ptrs[4]; for (uint32_t c = 0; c < n_chans; c++) @@ -3543,7 +3558,7 @@ namespace basisu assert(0); return false; } - + for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) @@ -3567,7 +3582,7 @@ namespace basisu image.height = height; header.num_channels = n_chans; - + header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo)); // Must be (A)BGR order, since most of EXR viewers expect this channel order. @@ -3578,37 +3593,37 @@ namespace basisu c = "BGR"[i]; else if (n_chans == 4) c = "ABGR"[i]; - + header.channels[i].name[0] = c; header.channels[i].name[1] = '\0'; header.channels[i].p_linear = linear_hint; } - + header.pixel_types = (int*)calloc(header.num_channels, sizeof(int)); header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int)); - + if (!no_compression) header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; - for (int i = 0; i < header.num_channels; i++) + for (int i = 0; i < header.num_channels; i++) { // pixel type of input image - header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; // pixel type of output image to be stored in .EXR - header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF; + header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF; } const char* pErr_msg = nullptr; int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg); - if (ret != TINYEXR_SUCCESS) + if (ret != TINYEXR_SUCCESS) { error_printf("Save EXR err: %s\n", pErr_msg); FreeEXRErrorMessage(pErr_msg); } - + free(header.channels); free(header.pixel_types); free(header.requested_pixel_types); @@ -3622,7 +3637,7 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); @@ -3647,7 +3662,7 @@ namespace basisu for (uint32_t x = 0; x < 8; x++) { const uint32_t q = row_bits & (1 << x); - + const color_rgba* pColor = q ? &fg : pBG; if (!pColor) continue; @@ -3667,8 +3682,8 @@ namespace basisu } } } - - // Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged. + + // Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged. // Only used for debugging/development. void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure, bool add_noise, bool per_component, bool luma_scaling) { @@ -3678,7 +3693,7 @@ namespace basisu rand r; r.seed(128); - + for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) @@ -3713,7 +3728,7 @@ namespace basisu { //Lmapped = L / (1.0f + L); //Lmapped /= L; - + Lmapped = 1.0f / (1.0f + L); } @@ -3933,7 +3948,7 @@ namespace basisu dst_img.set_all(color_rgba(0, 0, 0, 255)); basisu::vector half_img(width * 3 * height); - + uint32_t low_h = UINT32_MAX, high_h = 0; for (uint32_t y = 0; y < height; y++) @@ -3957,7 +3972,7 @@ namespace basisu low_h = minimum(low_h, h); high_h = maximum(high_h, h); - + half_img[(x + y * width) * 3 + i] = (basist::half_float)h; } // i @@ -3974,7 +3989,7 @@ namespace basisu for (uint32_t i = 0; i < 3; i++) { basist::half_float h = half_img[(x + y * width) * 3 + i]; - + float f = (float)(h - low_h) / (float)(high_h - low_h); int iv = basisu::clamp((int)std::round(f * 255.0f), 0, 255); @@ -3988,6 +4003,328 @@ namespace basisu return true; } + bool arith_test() + { + basist::arith_fastbits_f32::init(); + + fmt_printf("random bit test\n"); + + const uint32_t N = 1000; + + // random bit test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + for (uint32_t j = 0; j < num_vals; j++) + enc.put_bit(r.bit()); + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + for (uint32_t j = 0; j < num_vals; j++) + { + uint32_t t = r.bit(); + + uint32_t a = dec.get_bit(); + if (t != a) + { + fmt_printf("error!"); + return false; + } + } + } + } + + fmt_printf("Random bit test OK\n"); + + fmt_printf("random bits test\n"); + + // random bits test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + uint32_t num_bits = r.irand(1, 20); + + for (uint32_t j = 0; j < num_vals; j++) + enc.put_bits(r.urand32() & ((1 << num_bits) - 1), num_bits); + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + uint32_t num_bits = r.irand(1, 20); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + for (uint32_t j = 0; j < num_vals; j++) + { + uint32_t t = r.urand32() & ((1 << num_bits) - 1); + + uint32_t a = dec.get_bits(num_bits); + if (t != a) + { + fmt_printf("error!"); + return false; + } + } + } + } + + fmt_printf("Random bits test OK\n"); + + fmt_printf("random adaptive bit model test\n"); + + // adaptive bit model random test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + enc.encode(r.bit(), bm); + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + { + uint32_t t = r.bit(); + + uint32_t a = dec.decode_bit(bm); + if (t != a) + { + fmt_printf("error!"); + return false; + } + } + } + } + fmt_printf("Random adaptive bits test OK\n"); + + fmt_printf("random adaptive bit model 0 or 1 run test\n"); + + // adaptive bit model 0 or 1 test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + enc.encode(i & 1, bm); + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 20000); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + { + uint32_t t = i & 1; + + uint32_t a = dec.decode_bit(bm); + if (t != a) + { + fmt_printf("error!"); + return false; + } + } + } + } + + fmt_printf("Adaptive bit model 0 or 1 run test OK\n"); + + fmt_printf("random adaptive bit model 0 or 1 run 2 test\n"); + + // adaptive bit model 0 or 1 run test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 2000); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + { + const uint32_t run_len = r.irand(1, 128); + const uint32_t t = r.bit(); + for (uint32_t k = 0; k < run_len; k++) + enc.encode(t, bm); + } + + if (r.frand(0.0f, 1.0f) < .1f) + { + for (uint32_t q = 0; q < 1000; q++) + enc.encode(0, bm); + } + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 2000); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + basist::arith::arith_bit_model bm; + bm.init(); + + for (uint32_t j = 0; j < num_vals; j++) + { + const uint32_t run_len = r.irand(1, 128); + const uint32_t t = r.bit(); + + for (uint32_t k = 0; k < run_len; k++) + { + uint32_t a = dec.decode_bit(bm); + if (a != t) + { + fmt_printf("adaptive bit model random run test failed!\n"); + return false; + } + } + } + + if (r.frand(0.0f, 1.0f) < .1f) + { + for (uint32_t q = 0; q < 1000; q++) + { + uint32_t d = dec.decode_bit(bm); + if (d != 0) + { + fmt_printf("adaptive bit model random run test failed!\n"); + return false; + } + } + } + } + } + + fmt_printf("Random data model test\n"); + + // random data model test + for (uint32_t i = 0; i < N; i++) + { + basist::arith::arith_enc enc; + enc.init(4096); + + { + basisu::rand r; + r.seed(i + 1); + const uint32_t num_vals = r.irand(1, 60000); + + uint32_t num_syms = r.irand(2, basist::arith::ArithMaxSyms); + + basist::arith::arith_data_model dm; + dm.init(num_syms); + + for (uint32_t j = 0; j < num_vals; j++) + enc.encode(r.irand(0, num_syms - 1), dm); + + enc.flush(); + } + + { + basisu::rand r; + r.seed(i + 1); + uint32_t num_vals = r.irand(1, 60000); + + const uint32_t num_syms = r.irand(2, basist::arith::ArithMaxSyms); + + basist::arith::arith_dec dec; + dec.init(enc.get_data_buf().get_ptr(), enc.get_data_buf().size()); + + basist::arith::arith_data_model dm; + dm.init(num_syms); + + for (uint32_t j = 0; j < num_vals; j++) + { + uint32_t expected = r.irand(0, num_syms - 1); + uint32_t actual = dec.decode_sym(dm); + if (actual != expected) + { + fmt_printf("adaptive data model random test failed!\n"); + return false; + } + } + } + } + + fmt_printf("Adaptive data model random test OK\n"); + + fmt_printf("Overall OK\n"); + return true; + } + static void rasterize_line(image& dst, int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_rgba& color) { int start, end, var; @@ -4023,6 +4360,7 @@ namespace basisu } } } + void draw_line(image& dst, int xs, int ys, int xe, int ye, const color_rgba& color) { if (xs > xe) @@ -4084,7 +4422,7 @@ namespace basisu int y = 0; int err = 1 - x; - while (x >= y) + while (x >= y) { dst.set_clipped(cx + x, cy + y, color); dst.set_clipped(cx + y, cy + x, color); @@ -4097,11 +4435,11 @@ namespace basisu ++y; - if (err < 0) + if (err < 0) { err += 2 * y + 1; } - else + else { --x; err += 2 * (y - x) + 1; @@ -4116,4 +4454,272 @@ namespace basisu img(x, y).a = (uint8_t)a; } + // red=3 subsets, blue=2 subsets, green=mode 6, white=mode 7, purple = 2 plane + const color_rgba g_bc7_mode_vis_colors[8] = + { + color_rgba(190, 0, 0, 255), // 0 + color_rgba(0, 0, 255, 255), // 1 + color_rgba(255, 0, 0, 255), // 2 + color_rgba(0, 0, 130, 255), // 3 + color_rgba(255, 0, 255, 255), // 4 + color_rgba(190, 0, 190, 255), // 5 + color_rgba(50, 167, 30, 255), // 6 + color_rgba(255, 255, 255, 255) // 7 + }; + + void create_bc7_debug_images( + uint32_t width, uint32_t height, + const void *pBlocks, + const char *pFilename_prefix) + { + assert(width && height && pBlocks ); + + const uint32_t num_bc7_blocks_x = (width + 3) >> 2; + const uint32_t num_bc7_blocks_y = (height + 3) >> 2; + const uint32_t total_bc7_blocks = num_bc7_blocks_x * num_bc7_blocks_y; + + image bc7_mode_vis(width, height); + + uint32_t bc7_mode_hist[9] = {}; + + uint32_t mode4_index_hist[2] = {}; + uint32_t mode4_rot_hist[4] = {}; + uint32_t mode5_rot_hist[4] = {}; + + uint32_t num_2subsets = 0, num_3subsets = 0, num_dp = 0; + + uint32_t total_solid_bc7_blocks = 0; + uint32_t num_unpack_failures = 0; + + for (uint32_t by = 0; by < num_bc7_blocks_y; by++) + { + const uint32_t base_y = by * 4; + + for (uint32_t bx = 0; bx < num_bc7_blocks_x; bx++) + { + const uint32_t base_x = bx * 4; + + const basist::bc7_block& blk = ((const basist::bc7_block *)pBlocks)[bx + by * num_bc7_blocks_x]; + + color_rgba unpacked_pixels[16]; + bool status = basist::bc7u::unpack_bc7(&blk, (basist::color_rgba*)unpacked_pixels); + if (!status) + num_unpack_failures++; + + int mode_index = basist::bc7u::determine_bc7_mode(&blk); + + bool is_solid = false; + + // assumes our transcoder's analytical BC7 encoder wrote the solid block + if (mode_index == 5) + { + const uint8_t* pBlock_bytes = (const uint8_t *)&blk; + + if (pBlock_bytes[0] == 0b00100000) + { + static const uint8_t s_tail_bytes[8] = { 0xac, 0xaa, 0xaa, 0xaa, 0, 0, 0, 0 }; + if ((pBlock_bytes[8] & ~3) == (s_tail_bytes[0] & ~3)) + { + if (memcmp(pBlock_bytes + 9, s_tail_bytes + 1, 7) == 0) + { + is_solid = true; + } + } + } + } + + total_solid_bc7_blocks += is_solid; + + if ((mode_index == 0) || (mode_index == 2)) + num_3subsets++; + else if ((mode_index == 1) || (mode_index == 3)) + num_2subsets++; + + bc7_mode_hist[mode_index + 1]++; + + if (mode_index == 4) + { + num_dp++; + mode4_index_hist[range_check(basist::bc7u::determine_bc7_mode_4_index_mode(&blk), 0, 1)]++; + mode4_rot_hist[range_check(basist::bc7u::determine_bc7_mode_4_or_5_rotation(&blk), 0, 3)]++; + } + else if (mode_index == 5) + { + num_dp++; + mode5_rot_hist[range_check(basist::bc7u::determine_bc7_mode_4_or_5_rotation(&blk), 0, 3)]++; + } + + color_rgba c((mode_index < 0) ? g_black_color : g_bc7_mode_vis_colors[mode_index]); + + if (is_solid) + c.set(64, 0, 64, 255); + + bc7_mode_vis.fill_box(base_x, base_y, 4, 4, c); + + } // bx + + } // by + + fmt_debug_printf("--------- BC7 statistics:\n"); + fmt_debug_printf("\nTotal BC7 unpack failures: {}\n", num_unpack_failures); + fmt_debug_printf("Total solid blocks: {} {3.2}%\n", total_solid_bc7_blocks, (float)total_solid_bc7_blocks * (float)100.0f / (float)total_bc7_blocks); + + fmt_debug_printf("\nTotal 2-subsets: {} {3.2}%\n", num_2subsets, (float)num_2subsets * 100.0f / (float)total_bc7_blocks); + fmt_debug_printf("Total 3-subsets: {} {3.2}%\n", num_3subsets, (float)num_3subsets * 100.0f / (float)total_bc7_blocks); + fmt_debug_printf("Total Dual Plane: {} {3.2}%\n", num_dp, (float)num_dp * 100.0f / (float)total_bc7_blocks); + + fmt_debug_printf("\nBC7 mode histogram:\n"); + for (int i = -1; i <= 7; i++) + { + fmt_debug_printf(" {}: {} {3.3}%\n", i, bc7_mode_hist[1 + i], (float)bc7_mode_hist[1 + i] * 100.0f / (float)total_bc7_blocks); + } + + fmt_debug_printf("\nMode 4 index bit histogram: {} {3.2}%, {} {3.2}%\n", + mode4_index_hist[0], (float)mode4_index_hist[0] * 100.0f / (float)total_bc7_blocks, + mode4_index_hist[1], (float)mode4_index_hist[1] * 100.0f / (float)total_bc7_blocks); + + fmt_debug_printf("\nMode 4 rotation histogram:\n"); + for (uint32_t i = 0; i < 4; i++) + { + fmt_debug_printf(" {}: {} {3.2}%\n", i, mode4_rot_hist[i], (float)mode4_rot_hist[i] * 100.0f / (float)total_bc7_blocks); + } + + fmt_debug_printf("\nMode 5 rotation histogram:\n"); + for (uint32_t i = 0; i < 4; i++) + { + fmt_debug_printf(" {}: {} {3.2}%\n", i, mode5_rot_hist[i], (float)mode5_rot_hist[i] * 100.0f / (float)total_bc7_blocks); + } + + if (pFilename_prefix) + { + std::string mode_vis_filename(std::string(pFilename_prefix) + "bc7_mode_vis.png"); + save_png(mode_vis_filename, bc7_mode_vis); + + fmt_debug_printf("Wrote BC7 mode visualization to PNG file {}\n", mode_vis_filename); + } + + fmt_debug_printf("--------- End BC7 statistics\n"); + fmt_debug_printf("\n"); + } + + static inline float edge(const vec2F& a, const vec2F& b, const vec2F& pos) + { + return (pos[0] - a[0]) * (b[1] - a[1]) - (pos[1] - a[1]) * (b[0] - a[0]); + } + + void draw_tri2(image& dst, const image* pTex, const tri2& tri, bool alpha_blend) + { + assert(dst.get_total_pixels()); + + float area = edge(tri.p0, tri.p1, tri.p2); + if (std::fabs(area) < 1e-6f) + return; + + const float oo_area = 1.0f / area; + + int minx = (int)std::floor(basisu::minimum(tri.p0[0], tri.p1[0], tri.p2[0] )); + int miny = (int)std::floor(basisu::minimum(tri.p0[1], tri.p1[1], tri.p2[1] )); + + int maxx = (int)std::ceil(basisu::maximum(tri.p0[0], tri.p1[0], tri.p2[0])); + int maxy = (int)std::ceil(basisu::maximum(tri.p0[1], tri.p1[1], tri.p2[1])); + + auto clamp8 = [&](float fv) { int v = (int)(fv + .5f); if (v < 0) v = 0; else if (v > 255) v = 255; return (uint8_t)v; }; + + if ((maxx < 0) || (maxy < 0)) + return; + if ((minx >= (int)dst.get_width()) || (miny >= (int)dst.get_height())) + return; + + if (minx < 0) + minx = 0; + if (maxx >= (int)dst.get_width()) + maxx = dst.get_width() - 1; + if (miny < 0) + miny = 0; + if (maxy >= (int)dst.get_height()) + maxy = dst.get_height() - 1; + + vec4F tex(1.0f); + + for (int y = miny; y <= maxy; ++y) + { + assert((y >= 0) && (y < (int)dst.get_height())); + + for (int x = minx; x <= maxx; ++x) + { + assert((x >= 0) && (x < (int)dst.get_width())); + + vec2F p{ (float)x + 0.5f, (float)y + 0.5f }; + + float w0 = edge(tri.p1, tri.p2, p) * oo_area; + float w1 = edge(tri.p2, tri.p0, p) * oo_area; + float w2 = edge(tri.p0, tri.p1, p) * oo_area; + + if ((w0 < 0) || (w1 < 0) || (w2 < 0)) + continue; + + float u = tri.t0[0] * w0 + tri.t1[0] * w1 + tri.t2[0] * w2; + float v = tri.t0[1] * w0 + tri.t1[1] * w1 + tri.t2[1] * w2; + + if (pTex) + tex = pTex->get_filtered_vec4F(u * float(pTex->get_width()), v * float(pTex->get_height())) * (1.0f / 255.0f); + + float r = (float)tri.c0.r * w0 + (float)tri.c1.r * w1 + (float)tri.c2.r * w2; + float g = (float)tri.c0.g * w0 + (float)tri.c1.g * w1 + (float)tri.c2.g * w2; + float b = (float)tri.c0.b * w0 + (float)tri.c1.b * w1 + (float)tri.c2.b * w2; + float a = (float)tri.c0.a * w0 + (float)tri.c1.a * w1 + (float)tri.c2.a * w2; + + r *= tex[0]; + g *= tex[1]; + b *= tex[2]; + a *= tex[3]; + + if (alpha_blend) + { + color_rgba dst_color(dst(x, y)); + + const float fa = (float)a * (1.0f / 255.0f); + + r = lerp((float)dst_color[0], r, fa); + g = lerp((float)dst_color[1], g, fa); + b = lerp((float)dst_color[2], b, fa); + a = lerp((float)dst_color[3], a, fa); + + dst(x, y) = color_rgba(clamp8(r), clamp8(g), clamp8(b), clamp8(a)); + } + else + { + dst(x, y) = color_rgba(clamp8(r), clamp8(g), clamp8(b), clamp8(a)); + } + + } // x + } // y + } + + // macro sent by CMakeLists.txt file when (TARGET_WASM AND WASM_THREADING) +#if BASISU_WASI_THREADS + // Default to 8 - seems reasonable. + static int g_num_wasi_threads = 8; +#endif + +#ifdef __wasi__ + void set_num_wasi_threads(uint32_t num_threads) + { + g_num_wasi_threads = num_threads; + } +#else + void set_num_wasi_threads(uint32_t) { } +#endif + + int get_num_hardware_threads() + { +#ifdef __wasi__ + int num_threads = g_num_wasi_threads; +#else + int num_threads = std::thread::hardware_concurrency(); +#endif + + return num_threads; + } + } // namespace basisu diff --git a/external/basis_universal/encoder/basisu_enc.h b/external/basis_universal/encoder/basisu_enc.h index 0c644e8a28..ebf54737d0 100644 --- a/external/basis_universal/encoder/basisu_enc.h +++ b/external/basis_universal/encoder/basisu_enc.h @@ -1,5 +1,5 @@ // basisu_enc.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ namespace basisu { extern uint8_t g_hamming_dist[256]; extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; + extern float g_srgb_to_linear_table[256]; // sRGB EOTF->linear light [0,1], 1=~100 nits // true if basisu_encoder_init() has been called and returned. extern bool g_library_initialized; @@ -61,7 +62,7 @@ namespace basisu void error_vprintf(const char* pFmt, va_list args); void error_printf(const char *pFmt, ...); - + template inline void fmt_error_printf(const char* pFmt, Args&&... args) { @@ -72,7 +73,7 @@ namespace basisu } void platform_sleep(uint32_t ms); - + // Helpers inline uint8_t clamp255(int32_t i) @@ -92,21 +93,27 @@ namespace basisu return val << shift; } - inline int32_t clampi(int32_t value, int32_t low, int32_t high) - { - if (value < low) - value = low; - else if (value > high) - value = high; - return value; + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; } inline uint8_t mul_8(uint32_t v, uint32_t a) { - v = v * a + 128; + v = v * a + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + inline int fast_roundf_pos_int(float x) + { + assert(x >= 0.0f); + return (int)(x + 0.5f); + } + inline int fast_roundf_int(float x) { return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); @@ -163,7 +170,7 @@ namespace basisu return bits; } - + // Open interval inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } @@ -211,10 +218,10 @@ namespace basisu return -1; return (int)res; } - + // Hashing - - inline uint32_t bitmix32c(uint32_t v) + + inline uint32_t bitmix32c(uint32_t v) { v = (v + 0x7ed55d16) + (v << 12); v = (v ^ 0xc761c23c) ^ (v >> 19); @@ -225,7 +232,7 @@ namespace basisu return v; } - inline uint32_t bitmix32(uint32_t v) + inline uint32_t bitmix32(uint32_t v) { v -= (v << 6); v ^= (v >> 17); @@ -246,29 +253,7 @@ namespace basisu seed = seed ^ (seed >> 15); return seed; } - - uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); - - template - struct bit_hasher - { - inline std::size_t operator()(const Key& k) const - { - return hash_hsieh(reinterpret_cast(&k), sizeof(k)); - } - }; - - struct string_hasher - { - inline std::size_t operator()(const std::string& k) const - { - size_t l = k.size(); - if (!l) - return 0; - return hash_hsieh(reinterpret_cast(k.c_str()), l); - } - }; - + class running_stat { public: @@ -351,7 +336,7 @@ namespace basisu }; // Linear algebra - + template class vec { @@ -456,7 +441,7 @@ namespace basisu inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } - + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } inline vec operator+ () const { return *this; } inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } @@ -465,17 +450,19 @@ namespace basisu inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } - + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } - + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + static inline T dot_product3(const vec& lhs, const vec& rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < minimum(3u, N); i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } + inline T dot3(const vec& rhs) const { return dot_product3(*this, rhs); } inline T norm() const { return dot_product(*this, *this); } inline T length() const { return sqrt(norm()); } @@ -546,7 +533,7 @@ namespace basisu template struct bitwise_copyable< vec > { enum { cFlag = true }; }; template struct bitwise_movable< vec > { enum { cFlag = true }; }; - + template class matrix { @@ -810,7 +797,7 @@ namespace basisu } #undef BASISU_GET_KEY - + // Very simple job pool with no dependencies. class job_pool { @@ -820,24 +807,24 @@ namespace basisu // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. job_pool(uint32_t num_threads); ~job_pool(); - + void add_job(const std::function& job); void add_job(std::function&& job); void wait_for_all(); size_t get_total_threads() const { return 1 + m_threads.size(); } - + private: std::vector m_threads; std::vector > m_queue; - + std::mutex m_mutex; std::condition_variable m_has_work; std::condition_variable m_no_more_jobs; - + uint32_t m_num_active_jobs; - + std::atomic m_kill_flag; std::atomic m_num_active_workers; @@ -882,7 +869,7 @@ namespace basisu return *this; } }; - + class color_rgba { public: @@ -1006,7 +993,7 @@ namespace basisu inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } - + inline void clear() { m_comps[0] = 0; @@ -1042,7 +1029,7 @@ namespace basisu } inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } - inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); } @@ -1114,7 +1101,7 @@ namespace basisu return color_distance(e1, e2, alpha); } } - + inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) { if (perceptual) @@ -1127,7 +1114,7 @@ namespace basisu int delta_l = dr * 14 + dg * 45 + db * 5; int delta_cr = dr * 64 - delta_l; int delta_cb = db * 64 - delta_l; - + // not >> 6, so the output is scaled by 7 bits, not 6 (to match the original function which scaled by 7, but had rare overflow issues) uint32_t id = ((uint32_t)(delta_l * delta_l) >> 5U) + ((((uint32_t)(delta_cr * delta_cr) >> 5U) * 26U) >> 7U) + @@ -1151,7 +1138,7 @@ namespace basisu if (alpha) { int da = (e1.a - e2.a) << 7; - + // This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring. uint32_t ea = ((uint32_t)(da * da) >> 7U); id += ea; @@ -1160,7 +1147,7 @@ namespace basisu // Make sure it can't overflow assert((((int64_t)da * (int64_t)da) >> 7) == ea); #endif - + } return id; @@ -1222,7 +1209,7 @@ namespace basisu return true; } - + inline std::string string_tolower(const std::string& s) { std::string result(s); @@ -1267,7 +1254,7 @@ namespace basisu char fname_buf[_MAX_FNAME] = { 0 }; char ext_buf[_MAX_EXT] = { 0 }; - errno_t error = _splitpath_s(p, + errno_t error = _splitpath_s(p, pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, @@ -1299,7 +1286,7 @@ namespace basisu if ((pDir->size()) && (pDir->back() != '/')) *pDir += "/"; } - + if (pFilename) { *pFilename = pBaseName; @@ -1326,7 +1313,7 @@ namespace basisu return (c == '/'); #endif } - + inline bool is_drive_separator(char c) { #ifdef _WIN32 @@ -1354,7 +1341,7 @@ namespace basisu string_combine_path(dst, p, q); string_combine_path(dst, dst.c_str(), r); } - + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) { string_combine_path(dst, p, q, r); @@ -1556,7 +1543,7 @@ namespace basisu codebook.resize(0); codebook.reserve(max_clusters); - + uint32_t node_index = 0; while (true) @@ -1567,7 +1554,7 @@ namespace basisu { codebook.resize(codebook.size() + 1); codebook.back() = cur.m_training_vecs; - + if (node_stack.empty()) break; @@ -1575,7 +1562,7 @@ namespace basisu node_stack.pop_back(); continue; } - + node_stack.push_back(cur.m_right_index); node_index = cur.m_left_index; } @@ -1616,7 +1603,7 @@ namespace basisu assert(node.is_leaf()); var_heap.delete_top(); - + if (node.m_training_vecs.size() > 1) { if (split_node(node_index, var_heap, l_children, r_children)) @@ -1705,7 +1692,7 @@ namespace basisu m_nodes[node_index].m_left_index = l_child_index; m_nodes[node_index].m_right_index = r_child_index; - + m_nodes[node_index].m_codebook_index = m_next_codebook_index; m_next_codebook_index++; @@ -1719,7 +1706,7 @@ namespace basisu if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) { TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); - + for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) { if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) @@ -1746,10 +1733,10 @@ namespace basisu if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) var_heap.add_heap(l_child_index, l_child.m_var); - + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) var_heap.add_heap(r_child_index, r_child.m_var); - + return true; } @@ -1845,7 +1832,7 @@ namespace basisu for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; - + l = TrainingVectorType::component_min(l, v); h = TrainingVectorType::component_max(h, v); } @@ -1926,8 +1913,8 @@ namespace basisu const uint32_t cMaxIters = 6; for (uint32_t iter = 0; iter < cMaxIters; iter++) { - l_children.resize(0); - r_children.resize(0); + l_children.resize(0); + r_children.resize(0); TrainingVectorType new_l_child(cZero), new_r_child(cZero); @@ -1975,11 +1962,12 @@ namespace basisu r_weight = 0; TrainingVectorType firstVec; + firstVec.clear(); for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; - + if ((!i) || (v == firstVec)) { firstVec = v; @@ -2081,7 +2069,7 @@ namespace basisu } Quantizer quantizers[cMaxThreads]; - + bool success_flags[cMaxThreads]; clear_obj(success_flags); @@ -2180,12 +2168,12 @@ namespace basisu // rg 6/24/2025 - Cross platform determinism #if 0 - typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, + typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, training_vec_bit_hasher> group_hash; #else typedef std::map< typename Quantizer::training_vec_type, weighted_block_group > group_hash; #endif - + //interval_timer tm; //tm.start(); @@ -2197,7 +2185,7 @@ namespace basisu #endif weighted_block_group g; - + if (even_odd_input_pairs_equal) { g.m_indices.resize(2); @@ -2282,7 +2270,7 @@ namespace basisu typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; const uint_vec& training_vec_indices = group_iter->second.m_indices; - + append_vector(codebook.back(), training_vec_indices); } } @@ -2359,7 +2347,7 @@ namespace basisu const double inv_total = 1.0f / total; const double neg_inv_log2 = -1.0f / log(2.0f); - + double e = 0.0f; for (uint32_t i = 0; i < m_hist.size(); i++) if (m_hist[i]) @@ -2368,7 +2356,7 @@ namespace basisu return e; } }; - + struct sym_freq { uint32_t m_key; @@ -2378,7 +2366,7 @@ namespace basisu sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); - + class huffman_encoding_table { public: @@ -2399,7 +2387,7 @@ namespace basisu bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); - + inline const uint16_vec &get_codes() const { return m_codes; } inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } @@ -2430,7 +2418,7 @@ namespace basisu m_bytes(other.m_bytes), m_bit_buffer(other.m_bit_buffer), m_bit_buffer_size(other.m_bit_buffer_size), - m_total_bits(other.m_total_bits) + m_total_bits(other.m_total_bits) { } @@ -2512,7 +2500,7 @@ namespace basisu m_bit_buffer = 0; m_bit_buffer_size = 0; - + return 8; } @@ -2561,7 +2549,7 @@ namespace basisu if (v < u) return put_bits(v, k); - + uint32_t x = v + u; assert((x >> 1) >= u); @@ -2573,20 +2561,20 @@ namespace basisu inline uint32_t put_rice(uint32_t v, uint32_t m) { assert(m); - + const uint64_t start_bits = m_total_bits; uint32_t q = v >> m, r = v & ((1 << m) - 1); // rice coding sanity check assert(q <= 64); - + for (; q > 16; q -= 16) put_bits(0xFFFF, 16); put_bits((1 << q) - 1, q); put_bits(r << 1, m + 1); - + return (uint32_t)(m_total_bits - start_bits); } @@ -2596,13 +2584,13 @@ namespace basisu const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t total_bits = 0; for ( ; ; ) { uint32_t next_v = v >> chunk_bits; - + total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); if (!next_v) break; @@ -2619,11 +2607,11 @@ namespace basisu { for (uint32_t i = 0; i < other.m_bytes.size(); i++) put_bits(other.m_bytes[i], 8); - + if (other.m_bit_buffer_size) put_bits(other.m_bit_buffer, other.m_bit_buffer_size); } - + private: uint8_vec m_bytes; uint32_t m_bit_buffer, m_bit_buffer_size; @@ -2653,7 +2641,7 @@ namespace basisu inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) { assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); - + m_bits_per_sym = bits_per_sym; m_total_syms_per_group = total_syms_per_group; m_cur_sym_bits = 0; @@ -2707,7 +2695,7 @@ namespace basisu return true; } - + inline uint32_t emit_next_sym(bitwise_coder &c) { uint32_t bits = 0; @@ -2737,7 +2725,7 @@ namespace basisu bool huffman_test(int rand_seed); // VQ index reordering - + class palette_index_reorderer { public: @@ -2758,7 +2746,7 @@ namespace basisu typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); - + // Table remaps old to new symbol indices inline const uint_vec &get_remap_table() const { return m_remap_table; } @@ -2779,12 +2767,12 @@ namespace basisu class image { public: - image() : + image() : m_width(0), m_height(0), m_pitch(0) { } - image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -2850,7 +2838,7 @@ namespace basisu image &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -2878,7 +2866,7 @@ namespace basisu void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) { assert(comps >= 1 && comps <= 4); - + resize(width, height); for (uint32_t y = 0; y < height; y++) @@ -2970,7 +2958,7 @@ namespace basisu p = w; clear(); - + if ((!p) || (!w) || (!h)) return *this; @@ -3049,8 +3037,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline image &set_clipped(int x, int y, const color_rgba &c) + + inline image &set_clipped(int x, int y, const color_rgba &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -3214,7 +3202,7 @@ namespace basisu } void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); - + // bilinear filtering vec4F get_filtered_vec4F(float x, float y) const { @@ -3257,7 +3245,7 @@ namespace basisu return result; } - + private: uint32_t m_width, m_height, m_pitch; // all in pixels color_rgba_vec m_pixels; @@ -3269,7 +3257,7 @@ namespace basisu inline bool is_solid_block(uint32_t n, const color_rgba* pPixels) { assert(n); - + if (n <= 1) return true; @@ -3300,12 +3288,12 @@ namespace basisu class imagef { public: - imagef() : + imagef() : m_width(0), m_height(0), m_pitch(0) { } - imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -3365,7 +3353,7 @@ namespace basisu imagef &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -3421,7 +3409,7 @@ namespace basisu set_clipped(x + ix, y + iy, c); return *this; } - + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) { if (p == UINT32_MAX) @@ -3440,7 +3428,7 @@ namespace basisu cur_state.swap(m_pixels); m_pixels.resize(p * h); - + for (uint32_t y = 0; y < h; y++) { for (uint32_t x = 0; x < w; x++) @@ -3503,8 +3491,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline imagef &set_clipped(int x, int y, const vec4F &c) + + inline imagef &set_clipped(int x, int y, const vec4F &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -3589,7 +3577,7 @@ namespace basisu { float &p = c[s]; union { float f; uint32_t u; } x; x.f = p; - + if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000)) { if (std::isnan(p)) @@ -3634,14 +3622,14 @@ namespace basisu fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n"); neg_msg = true; } - + status = false; } if (p > highest_mag) { p = highest_mag; - + if (!clamp_msg) { fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag); @@ -3705,7 +3693,7 @@ namespace basisu return result; } - + private: uint32_t m_width, m_height, m_pitch; // all in pixels vec4F_vec m_pixels; @@ -3767,15 +3755,17 @@ namespace basisu }; extern fast_linear_to_srgb g_fast_linear_to_srgb; - + // Image metrics - + class image_metrics { public: // TODO: Add ssim + uint32_t m_width, m_height; double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; bool m_has_neg, m_hf_mag_overflow, m_any_abnormal; + uint64_t m_sum_a, m_sum_b; image_metrics() { @@ -3784,6 +3774,8 @@ namespace basisu void clear() { + m_width = 0; + m_height = 0; m_max = 0; m_mean = 0; m_mean_squared = 0; @@ -3793,10 +3785,23 @@ namespace basisu m_has_neg = false; m_hf_mag_overflow = false; m_any_abnormal = false; + m_sum_a = 0; + m_sum_b = 0; } - void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } - void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); } + void print(const char *pPrefix = nullptr) + { + //fmt_printf("{}Max: {3.3} Mean: {3.3} RMS: {3.3} PSNR: {2.3} dB, Sums: {} {}, Dim: {}x{}\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_sum_a, m_sum_b, m_width, m_height); + fmt_printf("{}Max: {3.3} Mean: {3.3} RMS: {3.3} PSNR: {2.3} dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); + } + + void print_hp(const char* pPrefix = nullptr) + { + //fmt_printf("{}Max: {3.6} Mean: {3.6} RMS: {3.6} PSNR: {2.6} dB, Any Neg: {}, Half float overflow: {}, Any NaN/Inf: {}, Sums: {} {}, Dim: {}x{}\n", + // pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal, m_sum_a, m_sum_b, m_width, m_height); + fmt_printf("{}Max: {3.6} Mean: {3.6} RMS: {3.6} PSNR: {2.6} dB, Any Neg: {}, Half float overflow: {}, Any NaN/Inf: {}\n", + pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); + } void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false); void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error); @@ -3820,18 +3825,20 @@ namespace basisu bool load_jpg(const char *pFilename, image& img); bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img); inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } - + // Currently loads .PNG, .TGA, or .JPG bool load_image(const char* pFilename, image& img); inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } bool is_image_filename_hdr(const char* pFilename); + void convert_ldr_to_hdr_image(imagef& img, const image& ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); + // Supports .HDR and most (but not all) .EXR's (see TinyEXR). bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f); - + inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f) - { + { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias); } @@ -3849,7 +3856,7 @@ namespace basisu uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); - + struct rgbe_header_info { std::string m_program; @@ -3861,13 +3868,13 @@ namespace basisu double m_exposure; // watts/steradian/m^2. bool m_has_exposure; - void clear() - { - m_program.clear(); - m_gamma = 1.0f; - m_has_gamma = false; - m_exposure = 1.0f; - m_has_exposure = false; + void clear() + { + m_program.clear(); + m_gamma = 1.0f; + m_has_gamma = false; + m_exposure = 1.0f; + m_has_exposure = false; } }; @@ -3879,7 +3886,7 @@ namespace basisu bool read_exr(const char* pFilename, imagef& img, int& n_chans); bool read_exr(const void* pMem, size_t mem_size, imagef& img); - + enum { WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic @@ -3889,7 +3896,7 @@ namespace basisu // Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images. bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags); - + enum { cImageSaveGrayscale = 1, @@ -3899,25 +3906,28 @@ namespace basisu bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } + bool save_qoi(const char* pFilename, const image& img, uint32_t qoi_colorspace = 0); + inline bool save_qoi(const std::string& filename, const image& img, uint32_t qoi_colorspace = 0) { return save_qoi(filename.c_str(), img, qoi_colorspace); } + bool read_file_to_vec(const char* pFilename, uint8_vec& data); - bool read_file_to_data(const char* pFilename, void *pData, size_t len); + bool read_file_to_data(const char* pFilename, void *pData, size_t len); bool write_data_to_file(const char* pFilename, const void* pData, size_t len); - + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } - + bool image_resample(const image &src, image &dst, bool srgb = false, - const char *pFilter = "lanczos4", float filter_scale = 1.0f, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, - uint32_t first_comp = 0, uint32_t num_comps = 4); + uint32_t first_comp = 0, uint32_t num_comps = 4, float filter_scale_y = -1.0f); - bool image_resample(const imagef& src, imagef& dst, + bool image_resample(const imagef& src, imagef& dst, const char* pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, uint32_t first_comp = 0, uint32_t num_comps = 4); - + // Timing - + typedef uint64_t timer_ticks; class interval_timer @@ -3930,7 +3940,7 @@ namespace basisu double get_elapsed_secs() const; inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } - + static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_ticks(); @@ -4006,7 +4016,7 @@ namespace basisu void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false); bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img); bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img); - + // Intersection enum eClear { cClear = 0 }; enum eInitExpand { cInitExpand = 0 }; @@ -4297,9 +4307,9 @@ namespace basisu BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) { assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h)); - + // add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127) - static const fu32 K = { 0x77800000 }; + static const fu32 K = { 0x77800000 }; fu32 o; o.u = h << 13; @@ -4315,7 +4325,7 @@ namespace basisu // Sutract 112 from the exponent, to change the bias from 127 to 15. static const fu32 g_f_to_h{ 0x7800000 }; - + fu32 fu; fu.f = minimum((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f; @@ -4327,17 +4337,17 @@ namespace basisu { assert(!isnan(f) && !isinf(f)); assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT)); - + // Sutract 112 from the exponent, to change the bias from 127 to 15. static const fu32 g_f_to_h{ 0x7800000 }; fu32 fu; fu.f = f * g_f_to_h.f; - + return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF); } - + inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f) { assert(!isnan(f) && !isinf(f)); @@ -4363,6 +4373,28 @@ namespace basisu return (basist::half_float)h; } + bool arith_test(); + + void set_image_alpha(image& img, uint32_t a); + + void create_bc7_debug_images( + uint32_t width, uint32_t height, + const void* pBlocks, + const char* pFilename_prefix); + + struct tri2 + { + vec2F p0, p1, p2; + vec2F t0, t1, t2; + color_rgba c0, c1, c2; + }; + + // simple non-perspective correct triangle rasterizer with texture mapping, useful for generating randomized test data + void draw_tri2(image& dst, const image* pTex, const tri2& tri, bool alpha_blend); + + void set_num_wasi_threads(uint32_t num_threads); + int get_num_hardware_threads(); + } // namespace basisu #include "basisu_math.h" diff --git a/external/basis_universal/encoder/basisu_etc.cpp b/external/basis_universal/encoder/basisu_etc.cpp index abc78e360d..5bae228b3a 100644 --- a/external/basis_universal/encoder/basisu_etc.cpp +++ b/external/basis_universal/encoder/basisu_etc.cpp @@ -1,5 +1,5 @@ // basis_etc.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ namespace basisu { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } }; - + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] @@ -113,7 +113,7 @@ namespace basisu static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) { - const uint32_t limit = diff ? 32 : 16; + const uint32_t limit = diff ? 32 : 16; BASISU_NOTE_UNUSED(limit); assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); int c; @@ -261,7 +261,7 @@ namespace basisu return best_error; } - + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = @@ -300,7 +300,7 @@ namespace basisu { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } }; - + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, @@ -600,7 +600,7 @@ namespace basisu const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3, 255); } - + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) { const bool diff_flag = block.get_diff_bit(); @@ -723,7 +723,7 @@ namespace basisu { return (n << 4) | n; } - + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const { color_rgba unpacked_block[16]; @@ -772,7 +772,7 @@ namespace basisu } } } - + bool etc1_optimizer::compute() { assert(m_pResult->m_pSelectors); @@ -811,26 +811,26 @@ namespace basisu #if defined(DEBUG) || defined(_DEBUG) { - // Ultimate sanity check on the returned error. + // Ultimate sanity check on the returned error. // If this check fails, it likely means the SSE code diverged from C++ somehow, or there was an overflow somewhere. color_rgba block_colors[4]; m_best_solution.m_coords.get_block_colors(block_colors); const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; uint64_t actual_error = 0; - + bool perceptual; if (m_pParams->m_quality >= cETCQualityMedium) perceptual = m_pParams->m_perceptual; else perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (uint32_t i = 0; i < n; i++) actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); - + assert(actual_error == m_best_solution.m_error); } -#endif +#endif m_pResult->m_error = m_best_solution.m_error; @@ -1015,10 +1015,10 @@ namespace basisu m_luma.resize(n); m_sorted_luma_indices.resize(n); m_sorted_luma.resize(n); - + int min_r = 255, min_g = 255, min_b = 255; int max_r = 0, max_g = 0, max_b = 0; - + for (uint32_t i = 0; i < n; i++) { const color_rgba& c = m_pParams->m_pSrc_pixels[i]; @@ -1056,7 +1056,7 @@ namespace basisu m_pSorted_luma = &m_sorted_luma[0]; m_pSorted_luma_indices = &m_sorted_luma_indices[0]; - + for (uint32_t i = 0; i < n; i++) m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; } @@ -1072,7 +1072,7 @@ namespace basisu bool etc1_optimizer::check_for_redundant_solution(const etc1_solution_coordinates& coords) { // Hash first 3 bytes of color (RGB) - uint32_t kh = hash_hsieh((uint8_t*)&coords.m_unscaled_color.r, 3); + uint32_t kh = basist::hash_hsieh((uint8_t*)&coords.m_unscaled_color.r, 3); uint32_t h0 = kh & cSolutionsTriedHashMask; uint32_t h1 = (kh >> cSolutionsTriedHashBits) & cSolutionsTriedHashMask; @@ -1087,7 +1087,7 @@ namespace basisu return true; } - + static uint8_t g_eval_dist_tables[8][256] = { // 99% threshold @@ -1178,7 +1178,7 @@ namespace basisu uint64_t total_error = 0; const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - + if (!g_cpu_supports_sse41) { for (uint32_t c = 0; c < n; c++) @@ -1235,6 +1235,7 @@ namespace basisu perceptual_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); else linear_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + for (uint32_t i = 0; i < n; i++) m_temp_selectors[i] = pSelectors_to_use[i]; } @@ -1258,7 +1259,7 @@ namespace basisu } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - + #if BASISU_DEBUG_ETC_ENCODER_DEEPER printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); #endif @@ -1272,7 +1273,7 @@ namespace basisu success = true; } } - + return success; } @@ -1303,14 +1304,14 @@ namespace basisu } const color_rgba base_color(coords.get_scaled_color()); - + const uint32_t n = m_pParams->m_num_src_pixels; assert(trial_solution.m_selectors.size() == n); trial_solution.m_error = UINT64_MAX; - + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1330,10 +1331,10 @@ namespace basisu // 0 1 2 3 // 01 12 23 const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - + uint64_t total_error = 0; const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - + if (perceptual) { if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) diff --git a/external/basis_universal/encoder/basisu_frontend.cpp b/external/basis_universal/encoder/basisu_frontend.cpp index 102f1bdc37..d721b37d15 100644 --- a/external/basis_universal/encoder/basisu_frontend.cpp +++ b/external/basis_universal/encoder/basisu_frontend.cpp @@ -1,5 +1,5 @@ // basisu_frontend.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // -// TODO: +// TODO: // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. // @@ -40,20 +40,20 @@ namespace basisu const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; - + // TODO - How to handle internal verifies in the basisu lib static inline void handle_verify_failure(int line) { error_printf("basisu_frontend: verify check failed at line %i!\n", line); abort(); } - + bool basisu_frontend::init(const params &p) { debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n", p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0, p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level); - + if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters)) return false; if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters)) @@ -61,9 +61,9 @@ namespace basisu m_source_blocks.resize(0); append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks); - + m_params = p; - + if (m_params.m_pOpenCL_context) { BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block)); @@ -80,7 +80,7 @@ namespace basisu m_encoded_blocks.resize(m_params.m_num_source_blocks); memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0])); - + m_num_endpoint_codebook_iterations = 1; m_num_selector_codebook_iterations = 1; @@ -150,7 +150,7 @@ namespace basisu if (m_params.m_disable_hierarchical_endpoint_codebooks) m_use_hierarchical_endpoint_codebooks = false; - debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", + debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations); return true; @@ -238,7 +238,7 @@ namespace basisu { BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); } - + eliminate_redundant_or_empty_endpoint_clusters(); if (m_params.m_validate) @@ -252,7 +252,7 @@ namespace basisu if (early_out) break; } - + if (m_params.m_validate) { BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); @@ -268,13 +268,13 @@ namespace basisu if (m_use_hierarchical_selector_codebooks) compute_selector_clusters_within_each_parent_cluster(); - + if (m_params.m_compression_level == 0) { create_optimized_selector_codebook(0); find_optimal_selector_clusters_for_each_block(); - + introduce_special_selector_clusters(); } else @@ -295,7 +295,7 @@ namespace basisu } } } - + optimize_selector_codebook(); if (m_params.m_debug_stats) @@ -321,7 +321,7 @@ namespace basisu const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); - + m_endpoint_cluster_etc_params.resize(endpoints.size()); for (uint32_t i = 0; i < endpoints.size(); i++) { @@ -420,7 +420,7 @@ namespace basisu const uint32_t last_index = minimum(m_total_blocks, first_index + N); m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; @@ -433,7 +433,7 @@ namespace basisu uint64_t best_err = UINT64_MAX; uint32_t best_index = 0; etc_block best_block(trial_blk); - + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) { if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) @@ -496,6 +496,7 @@ namespace basisu const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; etc_block trial_blk; + clear_obj(trial_blk); trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]); trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]); trial_blk.set_flip_bit(true); @@ -546,7 +547,7 @@ namespace basisu m_selector_cluster_block_indices.resize(selectors.size()); for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); - + return true; } @@ -580,9 +581,9 @@ namespace basisu const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32(); m_optimized_cluster_selectors.push_back(blk); - + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits()) @@ -590,7 +591,7 @@ namespace basisu // See if using flat selectors actually decreases the block's error. const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index]; - + etc_block cur_blk; const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0); cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false)); @@ -606,10 +607,10 @@ namespace basisu if (new_err >= cur_err) continue; - + // Change the block to use the new cluster m_block_selector_cluster_index[block_index] = new_selector_cluster_index; - + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); block_relocated_flags[block_index] = true; @@ -685,7 +686,7 @@ namespace basisu old_to_new[i] = (find_res.first)->second; continue; } - + old_to_new[i] = total_new_entries++; new_to_old.push_back(i); } @@ -714,7 +715,7 @@ namespace basisu { new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); } - + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); m_selector_cluster_block_indices.swap(new_selector_cluster_indices); @@ -725,7 +726,7 @@ namespace basisu for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; } - + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); } @@ -735,11 +736,11 @@ namespace basisu interval_timer tm; tm.start(); - + m_etc1_blocks_etc1s.resize(m_total_blocks); bool use_cpu = true; - + if (m_params.m_pOpenCL_context) { uint32_t total_perms = 64; @@ -747,9 +748,9 @@ namespace basisu total_perms = 4; else if (m_params.m_compression_level == 1) total_perms = 16; - else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + else if (m_params.m_compression_level == BASISU_MAX_ETC1S_COMPRESSION_LEVEL) total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS; - + bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms); if (status) use_cpu = false; @@ -760,7 +761,7 @@ namespace basisu m_opencl_failed = true; } } - + if (use_cpu) { const uint32_t N = 4096; @@ -783,7 +784,7 @@ namespace basisu optimizer_params.m_quality = cETCQualityFast; else if (m_params.m_compression_level == 1) optimizer_params.m_quality = cETCQualityMedium; - else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + else if (m_params.m_compression_level == BASISU_MAX_ETC1S_COMPRESSION_LEVEL) optimizer_params.m_quality = cETCQualityUber; optimizer_params.m_num_src_pixels = 16; @@ -817,16 +818,16 @@ namespace basisu m_params.m_pJob_pool->wait_for_all(); } // use_cpu - + debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } void basisu_frontend::init_endpoint_training_vectors() { debug_printf("init_endpoint_training_vectors\n"); - + vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs(); - + training_vecs.resize(m_total_blocks * 2); const uint32_t N = 16384; @@ -838,12 +839,12 @@ namespace basisu m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { for (uint32_t block_index = first_index; block_index < last_index; block_index++) - { + { const etc_block &blk = m_etc1_blocks_etc1s[block_index]; color_rgba block_colors[2]; blk.get_block_low_high_colors(block_colors, 0); - + vec6F v; v[0] = block_colors[0].r * (1.0f / 255.0f); v[1] = block_colors[0].g * (1.0f / 255.0f); @@ -851,7 +852,7 @@ namespace basisu v[3] = block_colors[1].r * (1.0f / 255.0f); v[4] = block_colors[1].g * (1.0f / 255.0f); v[5] = block_colors[1].b * (1.0f / 255.0f); - + training_vecs[block_index * 2 + 0] = std::make_pair(v, 1); training_vecs[block_index * 2 + 1] = std::make_pair(v, 1); @@ -870,7 +871,7 @@ namespace basisu const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0; uint32_t max_threads = 0; - max_threads = m_params.m_multithreaded ? minimum(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; + max_threads = m_params.m_multithreaded ? minimum(get_num_hardware_threads(), cMaxCodebookCreationThreads) : 0; if (m_params.m_pJob_pool) max_threads = minimum((int)m_params.m_pJob_pool->get_total_threads(), max_threads); @@ -919,12 +920,12 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) { const uint_vec &cluster = m_endpoint_clusters[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { const uint32_t block_index = cluster[j] >> 1; - + BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size()); if (!j) @@ -938,7 +939,7 @@ namespace basisu } } } - + if (m_params.m_debug_stats) debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32()); } @@ -996,7 +997,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -1009,8 +1010,8 @@ namespace basisu const uint32_t N = 512; for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum(m_endpoint_clusters.size_u32(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum(m_endpoint_clusters.size_u32(), cluster_index_iter + N); m_params.m_pJob_pool->add_job( [this, first_index, last_index] { @@ -1039,7 +1040,7 @@ namespace basisu const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index]; assert(etc_params.m_valid); - + color_rgba block_colors[4]; etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true); @@ -1071,7 +1072,7 @@ namespace basisu quant_err.m_cluster_subblock_index = cluster_indices_iter; quant_err.m_block_index = block_index; quant_err.m_subblock_index = subblock_index; - + { std::lock_guard lock(m_lock); @@ -1088,7 +1089,7 @@ namespace basisu vector_sort(m_subblock_endpoint_quant_err_vec); } - + void basisu_frontend::introduce_new_endpoint_clusters() { debug_printf("introduce_new_endpoint_clusters\n"); @@ -1159,9 +1160,9 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2); cluster_sizes[subblock_to_move.m_cluster_index] -= 2; - + ignore_cluster.insert(subblock_to_move.m_cluster_index); - + total_new_clusters++; num_new_endpoint_clusters--; @@ -1197,23 +1198,23 @@ namespace basisu inline std::size_t operator()(const color_rgba& k) const { uint32_t v = *(const uint32_t*)&k; - + //return bitmix32(v); - + //v ^= (v << 10); //v ^= (v >> 12); - + return v; } }; - + // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them. // TODO: Don't optimize endpoint clusters which haven't changed. // If step>=1, we check to ensure the new endpoint values actually decrease quantization error. void basisu_frontend::generate_endpoint_codebook(uint32_t step) { debug_printf("generate_endpoint_codebook\n"); - + interval_timer tm; tm.start(); @@ -1226,7 +1227,7 @@ namespace basisu const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size(); basisu::vector pixel_clusters(total_clusters); - + std::vector input_pixels; input_pixels.reserve(m_total_blocks * 16); @@ -1259,7 +1260,7 @@ namespace basisu pixel_weights.resize(pixel_weights.size() + total_pixels); uint64_t dst_ofs = first_pixel_index; - + uint64_t total_r = 0, total_g = 0, total_b = 0; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1311,7 +1312,7 @@ namespace basisu const uint64_t first_pixel_index = input_pixels.size(); uint32_t prev_color = 0, cur_weight = 0; - + for (uint32_t i = 0; i < colors.size(); i++) { uint32_t cur_color = pSorted[i]; @@ -1359,7 +1360,7 @@ namespace basisu uint32_t *pPrev_weight = nullptr; color_rgba prev_color; - + { color_rgba cur_color = pBlock_pixels[0]; auto res = color_hasher.insert(cur_color, 0); @@ -1371,7 +1372,7 @@ namespace basisu prev_color = cur_color; pPrev_weight = &(res.first)->second; } - + for (uint32_t i = 1; i < 16; i++) { color_rgba cur_color = pBlock_pixels[i]; @@ -1404,9 +1405,9 @@ namespace basisu input_pixels.resize(first_pixel_index + total_unique_pixels); pixel_weights.resize(first_pixel_index + total_unique_pixels); - + uint32_t j = 0; - + for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j) { input_pixels[first_pixel_index + j] = it->first; @@ -1438,7 +1439,7 @@ namespace basisu uint32_t total_perms = 64; if (m_params.m_compression_level <= 1) total_perms = 16; - else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + else if (m_params.m_compression_level == BASISU_MAX_ETC1S_COMPRESSION_LEVEL) total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS; basisu::vector output_blocks(total_clusters); @@ -1456,7 +1457,7 @@ namespace basisu for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) { const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index]; - + const etc_block& blk = output_blocks[new_cluster_index]; endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index]; @@ -1464,7 +1465,7 @@ namespace basisu prev_etc_params.m_valid = true; etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false); prev_etc_params.m_inten_table[0] = blk.get_inten_table(0); - prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this + prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this } use_cpu = false; @@ -1518,7 +1519,7 @@ namespace basisu { etc1_optimizer optimizer; - etc1_solution_coordinates solutions[2]; + //etc1_solution_coordinates solutions[2]; etc1_optimizer::params cluster_optimizer_params; cluster_optimizer_params.m_num_src_pixels = total_pixels; @@ -1529,7 +1530,7 @@ namespace basisu if (m_params.m_compression_level <= 1) cluster_optimizer_params.m_quality = cETCQualityMedium; - else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + else if (m_params.m_compression_level == BASISU_MAX_ETC1S_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; etc1_optimizer::results cluster_optimizer_results; @@ -1647,7 +1648,7 @@ namespace basisu uint32_t basisu_frontend::refine_endpoint_clusterization() { debug_printf("refine_endpoint_clusterization\n"); - + if (m_use_hierarchical_endpoint_codebooks) compute_endpoint_clusters_within_each_parent_cluster(); @@ -1668,9 +1669,9 @@ namespace basisu } // cluster_indices_iter } - + //---------------------------------------------------------- - + // Create a new endpoint clusterization interval_timer tm; @@ -1687,7 +1688,7 @@ namespace basisu const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size(); basisu::vector cl_block_info_structs(m_total_blocks); - + // the size of each parent cluster, in total clusters uint_vec parent_cluster_sizes(total_parent_clusters); for (uint32_t i = 0; i < total_parent_clusters; i++) @@ -1701,7 +1702,7 @@ namespace basisu cur_ofs += parent_cluster_sizes[i]; } - + // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step. BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size()); const uint32_t total_actual_endpoint_clusters = cur_ofs; @@ -1727,11 +1728,11 @@ namespace basisu cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index; } } - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index]; - + cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]); cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]); @@ -1746,7 +1747,7 @@ namespace basisu uint_vec sorted_block_indices(m_total_blocks); indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data()); - + bool status = opencl_refine_endpoint_clusterization( m_params.m_pOpenCL_context, cl_block_info_structs.data(), @@ -1902,7 +1903,7 @@ namespace basisu break; } } // j - + best_cluster_indices[block_index] = best_cluster_index; } // block_index @@ -1912,9 +1913,9 @@ namespace basisu } // block_index_iter m_params.m_pJob_pool->wait_for_all(); - + } // use_cpu - + debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs()); basisu::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); @@ -1957,7 +1958,7 @@ namespace basisu basisu::vector > new_endpoint_clusters(m_endpoint_clusters.size()); basisu::vector new_subblock_etc_params(m_endpoint_clusters.size()); - + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { uint32_t j = sorted_endpoint_cluster_indices[i]; @@ -1972,7 +1973,7 @@ namespace basisu new_endpoint_clusters.resize(0); new_subblock_etc_params.resize(0); - + for (int i = 0; i < (int)m_endpoint_clusters.size(); ) { if (!m_endpoint_clusters[i].size()) @@ -1990,7 +1991,7 @@ namespace basisu new_endpoint_clusters.push_back(m_endpoint_clusters[i]); new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]); - + for (int k = i + 1; k < j; k++) { append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]); @@ -1998,7 +1999,7 @@ namespace basisu i = j; } - + if (m_endpoint_clusters.size() != new_endpoint_clusters.size()) { if (m_params.m_debug_stats) @@ -2013,7 +2014,7 @@ namespace basisu void basisu_frontend::create_initial_packed_texture() { debug_printf("create_initial_packed_texture\n"); - + interval_timer tm; tm.start(); @@ -2026,7 +2027,7 @@ namespace basisu for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; - + const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0]; uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0]; @@ -2088,7 +2089,7 @@ namespace basisu m_params.m_pJob_pool->wait_for_all(); } // use_cpu - + m_orig_encoded_blocks = m_encoded_blocks; debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); @@ -2105,7 +2106,7 @@ namespace basisu for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { const uint32_t block_index = cluster_indices[cluster_indices_iter]; - + block_selector_cluster_indices[block_index] = cluster_index; } // cluster_indices_iter @@ -2130,7 +2131,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -2139,11 +2140,11 @@ namespace basisu void basisu_frontend::generate_selector_clusters() { debug_printf("generate_selector_clusters\n"); - + typedef tree_vector_quant vec16F_clusterizer; - + vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks); - + const uint32_t N = 4096; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -2171,10 +2172,10 @@ namespace basisu const uint32_t cColorDistToWeight = 300; const uint32_t cMaxWeight = 4096; uint32_t weight = clamp(dist / cColorDistToWeight, 1, cMaxWeight); - + training_vecs[block_index].first = v; training_vecs[block_index].second = weight; - + } // block_index } ); @@ -2192,7 +2193,7 @@ namespace basisu debug_printf("Using selector parent codebook size %u\n", parent_codebook_size); uint32_t max_threads = 0; - max_threads = m_params.m_multithreaded ? minimum(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; + max_threads = m_params.m_multithreaded ? minimum(get_num_hardware_threads(), cMaxCodebookCreationThreads) : 0; if (m_params.m_pJob_pool) max_threads = minimum((int)m_params.m_pJob_pool->get_total_threads(), max_threads); @@ -2235,7 +2236,7 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) { const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { @@ -2267,7 +2268,7 @@ namespace basisu debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size()); m_optimized_cluster_selectors.resize(total_selector_clusters); - + // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector. const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N) @@ -2351,7 +2352,7 @@ namespace basisu m_params.m_pJob_pool->wait_for_all(); debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); - + if (m_params.m_debug_images) { uint32_t max_selector_cluster_size = 0; @@ -2377,7 +2378,7 @@ namespace basisu uint32_t block_index = cluster_block_indices[i]; const etc_block &blk = m_orig_encoded_blocks[block_index]; - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3)); @@ -2399,7 +2400,7 @@ namespace basisu interval_timer tm; tm.start(); - + if (m_params.m_validate) { // Sanity checks @@ -2414,7 +2415,7 @@ namespace basisu } m_block_selector_cluster_index.resize(m_total_blocks); - + if (m_params.m_compression_level == 0) { // Just leave the blocks in their original selector clusters. @@ -2435,7 +2436,7 @@ namespace basisu return; } - + bool use_cpu = true; if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks) @@ -2444,17 +2445,17 @@ namespace basisu basisu::vector selector_structs; selector_structs.reserve(m_optimized_cluster_selectors.size()); - + uint_vec parent_selector_cluster_offsets(num_parent_clusters); uint_vec selector_cluster_indices; selector_cluster_indices.reserve(m_optimized_cluster_selectors.size()); - + uint32_t cur_ofs = 0; for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++) { parent_selector_cluster_offsets[parent_index] = cur_ofs; - + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++) { const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j]; @@ -2464,7 +2465,7 @@ namespace basisu sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2)); selector_structs.enlarge(1)->m_packed_selectors = sel_bits; - + selector_cluster_indices.push_back(selector_cluster_index); } @@ -2472,7 +2473,7 @@ namespace basisu } const uint32_t total_input_selectors = cur_ofs; - + basisu::vector block_structs(m_total_blocks); for (uint32_t i = 0; i < m_total_blocks; i++) { @@ -2496,7 +2497,7 @@ namespace basisu selector_cluster_indices.data(), output_selector_cluster_indices.data(), m_params.m_perceptual); - + if (!status) { error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n"); @@ -2510,7 +2511,7 @@ namespace basisu m_selector_cluster_block_indices[i].resize(0); m_selector_cluster_block_indices[i].reserve(128); } - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { etc_block& blk = m_encoded_blocks[block_index]; @@ -2542,7 +2543,7 @@ namespace basisu } } } - + const uint32_t N = 2048; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -2550,13 +2551,13 @@ namespace basisu const uint32_t last_index = minimum(m_total_blocks, first_index + N); m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] { - + int prev_best_cluster_index = 0; for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const pixel_block& block = get_source_pixel_block(block_index); - + etc_block& blk = m_encoded_blocks[block_index]; if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1))) @@ -2564,18 +2565,18 @@ namespace basisu blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits()); m_block_selector_cluster_index[block_index] = prev_best_cluster_index; - + continue; } - + const color_rgba* pBlock_pixels = block.get_ptr(); - + color_rgba trial_block_colors[4]; blk.get_block_colors_etc1s(trial_block_colors); // precompute errors for the i-th block pixel and selector sel: [sel][i] uint32_t trial_errors[4][16]; - + if (m_params.m_perceptual) { for (uint32_t sel = 0; sel < 4; ++sel) @@ -2652,7 +2653,7 @@ namespace basisu for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) { const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; - + const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16]; uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3]; @@ -2685,7 +2686,7 @@ namespace basisu m_block_selector_cluster_index[block_index] = best_cluster_index; prev_best_cluster_index = best_cluster_index; - + } // block_index } ); @@ -2693,7 +2694,7 @@ namespace basisu } // block_index_iter m_params.m_pJob_pool->wait_for_all(); - + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) { m_selector_cluster_block_indices[i].resize(0); @@ -2707,7 +2708,7 @@ namespace basisu vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index); m_selector_cluster_block_indices[best_cluster_index].push_back(block_index); } - + } // if (use_cpu) debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); @@ -2717,7 +2718,7 @@ namespace basisu uint32_t basisu_frontend::refine_block_endpoints_given_selectors() { debug_printf("refine_block_endpoints_given_selectors\n"); - + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) { //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y); @@ -2790,7 +2791,7 @@ namespace basisu total_subblocks_examined += total_pixels / 8; etc1_optimizer optimizer; - etc1_solution_coordinates solutions[2]; + //etc1_solution_coordinates solutions[2]; etc1_optimizer::params cluster_optimizer_params; cluster_optimizer_params.m_num_src_pixels = total_pixels; @@ -2898,7 +2899,7 @@ namespace basisu if (m_params.m_debug_stats) debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); - + return total_subblocks_refined; } @@ -2990,7 +2991,7 @@ namespace basisu } // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook. - // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. + // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. // This is basically a bottom up clusterization stage, where some leaves can be combined. void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices) { @@ -3002,12 +3003,12 @@ namespace basisu basisu::vector cluster_valid(new_endpoint_cluster_block_indices.size()); basisu::vector cluster_improved(new_endpoint_cluster_block_indices.size()); - + const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { @@ -3027,13 +3028,13 @@ namespace basisu blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false)); blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false)); blk.set_flip_bit(true); - + uint64_t cur_err = 0; for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++) { const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter]; - + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba)); @@ -3045,17 +3046,17 @@ namespace basisu blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits()); cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual); - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast(blk_selectors.get_selector(x, y)); } endpoint_cluster_etc_params new_endpoint_cluster_etc_params; - + { etc1_optimizer optimizer; - etc1_solution_coordinates solutions[2]; + //etc1_solution_coordinates solutions[2]; etc1_optimizer::params cluster_optimizer_params; cluster_optimizer_params.m_num_src_pixels = total_pixels; @@ -3065,7 +3066,7 @@ namespace basisu cluster_optimizer_params.m_perceptual = m_params.m_perceptual; cluster_optimizer_params.m_pForce_selectors = &force_selectors[0]; - if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) + if (m_params.m_compression_level == BASISU_MAX_ETC1S_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; else cluster_optimizer_params.m_quality = cETCQualitySlow; @@ -3091,7 +3092,7 @@ namespace basisu if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err) { m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params; - + cluster_improved[cluster_index] = true; } @@ -3104,13 +3105,13 @@ namespace basisu } // cluster_index_iter m_params.m_pJob_pool->wait_for_all(); - + uint32_t total_unused_clusters = 0; uint32_t total_improved_clusters = 0; - + old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size()); vector_set_all(old_to_new_endpoint_cluster_indices, -1); - + int total_new_endpoint_clusters = 0; for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) @@ -3145,7 +3146,7 @@ namespace basisu for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) { const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index]; - + const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index]; BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0); @@ -3158,13 +3159,13 @@ namespace basisu new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0); new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1); - + m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index; m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index; } debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n"); - + m_endpoint_clusters = new_endpoint_clusters; m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params; @@ -3200,7 +3201,7 @@ namespace basisu debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size()); } - + //debug_printf("validate_output: %u\n", validate_output()); } @@ -3228,7 +3229,7 @@ namespace basisu // If the endpoint cluster lives in more than one parent node, that's wrong. if (subblock_parent_indices[subblock_index] != -1) return false; - + subblock_parent_indices[subblock_index] = parent_index; } } @@ -3252,7 +3253,7 @@ namespace basisu if (subblock_cluster_indices[subblock_index] != -1) return false; - + subblock_cluster_indices[subblock_index] = cluster_index; // There are transformations on the endpoint clusters that can break the strict tree requirement @@ -3266,7 +3267,7 @@ namespace basisu } } } - + // Make sure all endpoint clusters are present in the parent cluster. for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++) { @@ -3291,7 +3292,7 @@ namespace basisu #define CHECK(x) BASISU_FRONTEND_VERIFY(x); CHECK(get_output_block(block_index).get_flip_bit() == true); - + const bool diff_flag = get_diff_flag(block_index); CHECK(diff_flag == true); @@ -3305,11 +3306,11 @@ namespace basisu // basisu only supports ETC1S, so these must be equal. CHECK(endpoint_cluster0_index == endpoint_cluster1_index); - + CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false))); CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false)); - + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false)); blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false)); @@ -3329,7 +3330,7 @@ namespace basisu CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color()); CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color()); CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits()); - + #undef CHECK } @@ -3376,9 +3377,10 @@ namespace basisu } image img; - g.unpack(img); + g.unpack(img, false); save_png(pFilename, img); } } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_frontend.h b/external/basis_universal/encoder/basisu_frontend.h index 18ff5b6675..a5aadb34ac 100644 --- a/external/basis_universal/encoder/basisu_frontend.h +++ b/external/basis_universal/encoder/basisu_frontend.h @@ -1,5 +1,5 @@ // basisu_frontend.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,8 +37,10 @@ namespace basisu uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } }; - const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; - const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; + // rg [11/25/25] - The command line tool defaults to ETC1S level 1, but the API 2. Changing this breaks backwards compatibility for anyone using the API and our test suite. + const uint32_t BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL = 2; + + const uint32_t BASISU_MAX_ETC1S_COMPRESSION_LEVEL = 6; class basisu_frontend { @@ -61,7 +63,7 @@ namespace basisu enum { cMaxEndpointClusters = 16128, - + cMaxSelectorClusters = 16128, }; @@ -72,7 +74,7 @@ namespace basisu m_pSource_blocks(NULL), m_max_endpoint_clusters(256), m_max_selector_clusters(256), - m_compression_level(BASISU_DEFAULT_COMPRESSION_LEVEL), + m_compression_level(BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL), m_perceptual(true), m_debug_stats(false), m_debug_images(false), @@ -101,12 +103,12 @@ namespace basisu bool m_validate; bool m_multithreaded; bool m_disable_hierarchical_endpoint_codebooks; - + basist::basis_texture_type m_tex_type; const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; - + opencl_context_ptr m_pOpenCL_context; - + job_pool *m_pJob_pool; }; @@ -143,12 +145,12 @@ namespace basisu uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_block_indices.size()); } uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } - + // Returns block indices using each selector cluster const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); - + void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); bool get_opencl_failed() const { return m_opencl_failed; } @@ -170,15 +172,15 @@ namespace basisu // The quantized ETC1S texture. etc_block_vec m_encoded_blocks; - + // Quantized blocks after endpoint quant, but before selector quant - etc_block_vec m_orig_encoded_blocks; - + etc_block_vec m_orig_encoded_blocks; + // Full quality ETC1S texture etc_block_vec m_etc1_blocks_etc1s; - + typedef vec<6, float> vec6F; - + // Endpoint clusterizer typedef tree_vector_quant vec6F_quantizer; vec6F_quantizer m_endpoint_clusterizer; @@ -187,16 +189,16 @@ namespace basisu basisu::vector m_endpoint_clusters; // Array of subblock indices for each parent endpoint cluster - // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. + // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. // As the endpoint clusters are manipulated this constraint gets broken. basisu::vector m_endpoint_parent_clusters; - + // Each block's parent endpoint cluster index - uint8_vec m_block_parent_endpoint_cluster; + uint8_vec m_block_parent_endpoint_cluster; // Array of endpoint cluster indices for each parent endpoint cluster basisu::vector m_endpoint_clusters_within_each_parent_cluster; - + struct endpoint_cluster_etc_params { endpoint_cluster_etc_params() @@ -266,13 +268,13 @@ namespace basisu }; typedef basisu::vector cluster_subblock_etc_params_vec; - - // Each endpoint cluster's ETC1S parameters + + // Each endpoint cluster's ETC1S parameters cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; // The endpoint cluster index used by each ETC1 subblock. basisu::vector m_block_endpoint_clusters_indices; - + // The block(s) within each selector cluster // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! basisu::vector m_selector_cluster_block_indices; @@ -282,13 +284,13 @@ namespace basisu // The block(s) within each parent selector cluster. basisu::vector m_selector_parent_cluster_block_indices; - + // Each block's parent selector cluster uint8_vec m_block_parent_selector_cluster; // Array of selector cluster indices for each parent selector cluster basisu::vector m_selector_clusters_within_each_parent_cluster; - + // Each block's selector cluster index basisu::vector m_block_selector_cluster_index; diff --git a/external/basis_universal/encoder/basisu_gpu_texture.cpp b/external/basis_universal/encoder/basisu_gpu_texture.cpp index 983d6a868d..59a2a174d9 100644 --- a/external/basis_universal/encoder/basisu_gpu_texture.cpp +++ b/external/basis_universal/encoder/basisu_gpu_texture.cpp @@ -1,5 +1,5 @@ // basisu_gpu_texture.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -29,13 +29,13 @@ namespace basisu //------------------------------------------------------------------------------------------------ // ETC2 EAC - void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) + void unpack_etc2_eac(const void* pBlock_bits, color_rgba* pPixels) { static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8"); - const eac_a8_block *pBlock = static_cast(pBlock_bits); + const eac_a8_block* pBlock = static_cast(pBlock_bits); - const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table]; + const int8_t* pTable = g_etc2_eac_tables[pBlock->m_table]; const uint64_t selector_bits = pBlock->get_selector_bits(); @@ -73,10 +73,10 @@ namespace basisu uint8_t m_high_color[cTotalEndpointBytes]; uint8_t m_selectors[cTotalSelectorBytes]; - inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } - static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) + static void unpack_color(uint32_t c, uint32_t& r, uint32_t& g, uint32_t& b) { r = (c >> 11) & 31; g = (c >> 5) & 63; @@ -91,11 +91,11 @@ namespace basisu }; // Returns true if the block uses 3 color punchthrough alpha mode. - bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + bool unpack_bc1(const void* pBlock_bits, color_rgba* pPixels, bool set_alpha) { static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); - const bc1_block *pBlock = static_cast(pBlock_bits); + const bc1_block* pBlock = static_cast(pBlock_bits); const uint32_t l = pBlock->get_low_color(); const uint32_t h = pBlock->get_high_color(); @@ -147,11 +147,11 @@ namespace basisu return used_punchthrough; } - bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + bool unpack_bc1_nv(const void* pBlock_bits, color_rgba* pPixels, bool set_alpha) { static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); - const bc1_block *pBlock = static_cast(pBlock_bits); + const bc1_block* pBlock = static_cast(pBlock_bits); const uint32_t l = pBlock->get_low_color(); const uint32_t h = pBlock->get_high_color(); @@ -182,19 +182,19 @@ namespace basisu if (l > h) { c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8); - c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256)); + c[2].g = (uint8_t)(((256 * c[0].g + gdiff / 4 + 128 + gdiff * 80) / 256)); c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8); c[2].a = 0xFF; c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8); - c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256); + c[3].g = (uint8_t)((256 * c[1].g - gdiff / 4 + 128 - gdiff * 80) / 256); c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8); c[3].a = 0xFF; } else { c[2].r = (uint8_t)(((r0 + r1) * 33) / 8); - c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256); + c[2].g = (uint8_t)((256 * c[0].g + gdiff / 4 + 128 + gdiff * 128) / 256); c[2].b = (uint8_t)(((b0 + b1) * 33) / 8); c[2].a = 0xFF; @@ -229,9 +229,9 @@ namespace basisu static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } - bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + bool unpack_bc1_amd(const void* pBlock_bits, color_rgba* pPixels, bool set_alpha) { - const bc1_block *pBlock = static_cast(pBlock_bits); + const bc1_block* pBlock = static_cast(pBlock_bits); const uint32_t l = pBlock->get_low_color(); const uint32_t h = pBlock->get_high_color(); @@ -310,7 +310,7 @@ namespace basisu return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); } - static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) + static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h) { pDst[0] = static_cast(l); pDst[1] = static_cast(h); @@ -323,7 +323,7 @@ namespace basisu return 6; } - static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) + static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h) { pDst[0] = static_cast(l); pDst[1] = static_cast(h); @@ -336,7 +336,7 @@ namespace basisu return 8; } - static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) + static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h) { if (l > h) return get_block_values8(pDst, l, h); @@ -345,11 +345,11 @@ namespace basisu } }; - void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride) + void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride) { static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); - const bc4_block *pBlock = static_cast(pBlock_bits); + const bc4_block* pBlock = static_cast(pBlock_bits); uint8_t sel_values[8]; bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); @@ -366,11 +366,11 @@ namespace basisu } // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. - bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels) + bool unpack_bc3(const void* pBlock_bits, color_rgba* pPixels) { bool success = true; - if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true)) + if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pPixels, true)) success = false; unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba)); @@ -379,10 +379,10 @@ namespace basisu } // writes RG - void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels) + void unpack_bc5(const void* pBlock_bits, color_rgba* pPixels) { unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba)); - unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba)); + unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba)); } //------------------------------------------------------------------------------------------------ @@ -439,323 +439,6 @@ namespace basisu } } - //------------------------------------------------------------------------------------------------ - // BC7 mode 0-7 decompression. - // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. - - static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } - static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } - - static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } - static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } - static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } - static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) - { - assert(l <= 255 && h <= 255); - switch (bits) - { - case 2: return bc7_interp2(l, h, w); - case 3: return bc7_interp3(l, h, w); - case 4: return bc7_interp4(l, h, w); - default: - break; - } - return 0; - } - - bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) - { - //const uint32_t SUBSETS = 3; - const uint32_t ENDPOINTS = 6; - const uint32_t COMPS = 3; - const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; - const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; - const uint32_t PBITS = (mode == 0) ? 6 : 0; - const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); - - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; - - const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); - - color_rgba endpoints[ENDPOINTS]; - for (uint32_t c = 0; c < COMPS; c++) - for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); - - uint32_t pbits[6]; - for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); - - uint32_t weights[16]; - for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); - - for (uint32_t e = 0; e < ENDPOINTS; e++) - for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); - - color_rgba block_colors[3][8]; - for (uint32_t s = 0; s < 3; s++) - for (uint32_t i = 0; i < WEIGHT_VALS; i++) - { - for (uint32_t c = 0; c < 3; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); - block_colors[s][i][3] = 255; - } - - for (uint32_t i = 0; i < 16; i++) - pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; - - return true; - } - - bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) - { - //const uint32_t SUBSETS = 2; - const uint32_t ENDPOINTS = 4; - const uint32_t COMPS = (mode == 7) ? 4 : 3; - const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; - const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); - const uint32_t PBITS = (mode == 1) ? 2 : 4; - const uint32_t SHARED_PBITS = (mode == 1) ? true : false; - const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); - - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; - - const uint32_t part = read_bits32(pBuf, bit_offset, 6); - - color_rgba endpoints[ENDPOINTS]; - for (uint32_t c = 0; c < COMPS; c++) - for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); - - uint32_t pbits[4]; - for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); - - uint32_t weights[16]; - for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); - - for (uint32_t e = 0; e < ENDPOINTS; e++) - for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); - - color_rgba block_colors[2][8]; - for (uint32_t s = 0; s < 2; s++) - for (uint32_t i = 0; i < WEIGHT_VALS; i++) - { - for (uint32_t c = 0; c < COMPS; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); - block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; - } - - for (uint32_t i = 0; i < 16; i++) - pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; - - return true; - } - - bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) - { - const uint32_t ENDPOINTS = 2; - const uint32_t COMPS = 4; - const uint32_t WEIGHT_BITS = 2; - const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; - const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; - const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; - //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); - - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; - - const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); - const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; - - color_rgba endpoints[ENDPOINTS]; - for (uint32_t c = 0; c < COMPS; c++) - for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - - const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; - - uint32_t weights[16], a_weights[16]; - - for (uint32_t i = 0; i < 16; i++) - (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); - - for (uint32_t i = 0; i < 16; i++) - (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); - - assert(bit_offset == 128); - - for (uint32_t e = 0; e < ENDPOINTS; e++) - for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - - color_rgba block_colors[8]; - for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) - for (uint32_t c = 0; c < 3; c++) - block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); - - for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) - block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); - - for (uint32_t i = 0; i < 16; i++) - { - pPixels[i] = block_colors[weights[i]]; - pPixels[i].a = block_colors[a_weights[i]].a; - if (comp_rot >= 1) - std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); - } - - return true; - } - - struct bc7_mode_6 - { - struct - { - uint64_t m_mode : 7; - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 7; - uint64_t m_a1 : 7; - uint64_t m_p0 : 1; - } m_lo; - - union - { - struct - { - uint64_t m_p1 : 1; - uint64_t m_s00 : 3; - uint64_t m_s10 : 4; - uint64_t m_s20 : 4; - uint64_t m_s30 : 4; - - uint64_t m_s01 : 4; - uint64_t m_s11 : 4; - uint64_t m_s21 : 4; - uint64_t m_s31 : 4; - - uint64_t m_s02 : 4; - uint64_t m_s12 : 4; - uint64_t m_s22 : 4; - uint64_t m_s32 : 4; - - uint64_t m_s03 : 4; - uint64_t m_s13 : 4; - uint64_t m_s23 : 4; - uint64_t m_s33 : 4; - - } m_hi; - - uint64_t m_hi_bits; - }; - }; - - bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) - { - static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); - - const bc7_mode_6 &block = *static_cast(pBlock_bits); - - if (block.m_lo.m_mode != (1 << 6)) - return false; - - const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); - const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); - const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); - const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); - const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); - const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); - const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); - const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); - - color_rgba vals[16]; - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t w = basist::g_bc7_weights4[i]; - const uint32_t iw = 64 - w; - vals[i].set_noclamp_rgba( - (r0 * iw + r1 * w + 32) >> 6, - (g0 * iw + g1 * w + 32) >> 6, - (b0 * iw + b1 * w + 32) >> 6, - (a0 * iw + a1 * w + 32) >> 6); - } - - pPixels[0] = vals[block.m_hi.m_s00]; - pPixels[1] = vals[block.m_hi.m_s10]; - pPixels[2] = vals[block.m_hi.m_s20]; - pPixels[3] = vals[block.m_hi.m_s30]; - - pPixels[4] = vals[block.m_hi.m_s01]; - pPixels[5] = vals[block.m_hi.m_s11]; - pPixels[6] = vals[block.m_hi.m_s21]; - pPixels[7] = vals[block.m_hi.m_s31]; - - pPixels[8] = vals[block.m_hi.m_s02]; - pPixels[9] = vals[block.m_hi.m_s12]; - pPixels[10] = vals[block.m_hi.m_s22]; - pPixels[11] = vals[block.m_hi.m_s32]; - - pPixels[12] = vals[block.m_hi.m_s03]; - pPixels[13] = vals[block.m_hi.m_s13]; - pPixels[14] = vals[block.m_hi.m_s23]; - pPixels[15] = vals[block.m_hi.m_s33]; - - return true; - } - - bool unpack_bc7(const void *pBlock, color_rgba *pPixels) - { - const uint32_t first_byte = static_cast(pBlock)[0]; - - for (uint32_t mode = 0; mode <= 7; mode++) - { - if (first_byte & (1U << mode)) - { - switch (mode) - { - case 0: - case 2: - return unpack_bc7_mode0_2(mode, pBlock, pPixels); - case 1: - case 3: - case 7: - return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); - case 4: - case 5: - return unpack_bc7_mode4_5(mode, pBlock, pPixels); - case 6: - return unpack_bc7_mode6(pBlock, pPixels); - default: - break; - } - } - } - - return false; - } - static inline int bc6h_sign_extend(int val, int bits) { assert((bits >= 1) && (bits < 32)); @@ -1105,7 +788,7 @@ namespace basisu return false; if (pBlock->m_hi.m_alpha == 1) return false; - + color_rgba colors[4]; colors[0].r = pBlock->m_hi.m_r0; @@ -1155,7 +838,7 @@ namespace basisu for (uint32_t i = 0; i < 16; i++) { const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3; - + const uint32_t x = i & 3; const uint32_t y = i >> 2; pPixels[4 + x + y * 8] = block1_colors[sel]; @@ -1216,7 +899,7 @@ namespace basisu { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255); } - + static color_rgba convert_rgba_5554_to_8888(const color_rgba& col) { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); @@ -1239,10 +922,10 @@ namespace basisu { // colora=554 color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255); - + // colora=555 color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255); - + colors[0] = convert_rgb_555_to_888(color_a); colors[3] = convert_rgb_555_to_888(color_b); @@ -1251,11 +934,11 @@ namespace basisu } else { - // colora=4433 + // colora=4433 color_rgba color_a( - (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), + (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3), - (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), + (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), pBlock->m_trans_color_data.m_alpha_a << 1); //colorb=4443 @@ -1331,9 +1014,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t shift = 45 - ((y + x * 4) * 3); - + const uint32_t sel = (uint32_t)((sels >> shift) & 7); - + int val = base + g_etc2_eac_tables[table][sel] * mul; val = clamp(val, 0, 2047); @@ -1362,9 +1045,10 @@ namespace basisu { basist::unpack_uastc(*static_cast(p), (basist::color32 *)pPixels, false); } - + // Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only. - bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) + // astc_srgb: if true, ASTC LDR formats are decoded in sRGB decode mode, otherwise L8. + bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels, bool astc_srgb) { switch (fmt) { @@ -1400,7 +1084,7 @@ namespace basisu } case texture_format::cBC7: { - return unpack_bc7(pBlock, pPixels); + return basist::bc7u::unpack_bc7(pBlock, reinterpret_cast(pPixels)); } // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) case texture_format::cETC2_RGB: @@ -1433,14 +1117,32 @@ namespace basisu return false; } case texture_format::cASTC_LDR_4x4: - { - const bool astc_srgb = false; - bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast(pPixels), static_cast(pBlock), astc_srgb, 4, 4); + case texture_format::cASTC_LDR_5x4: + case texture_format::cASTC_LDR_5x5: + case texture_format::cASTC_LDR_6x5: + case texture_format::cASTC_LDR_6x6: + case texture_format::cASTC_LDR_8x5: + case texture_format::cASTC_LDR_8x6: + case texture_format::cASTC_LDR_10x5: + case texture_format::cASTC_LDR_10x6: + case texture_format::cASTC_LDR_8x8: + case texture_format::cASTC_LDR_10x8: + case texture_format::cASTC_LDR_10x10: + case texture_format::cASTC_LDR_12x10: + case texture_format::cASTC_LDR_12x12: + { + const uint32_t block_width = get_block_width(fmt), block_height = get_block_height(fmt); + + assert(get_astc_ldr_texture_format(block_width, block_height) == fmt); + assert(astc_helpers::is_valid_block_size(block_width, block_height)); + + // TODO: Allow caller to use the Android decoder, too. + bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast(pPixels), static_cast(pBlock), astc_srgb, block_width, block_height); assert(status); if (!status) return false; - + break; } case texture_format::cATC_RGB: @@ -1532,7 +1234,7 @@ namespace basisu #else // Use our decoder basist::half_float half_block[16][4]; - + astc_helpers::log_astc_block log_blk; if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4)) return false; @@ -1577,8 +1279,8 @@ namespace basisu assert(0); return false; } - - bool gpu_image::unpack(image& img) const + + bool gpu_image::unpack(image& img, bool astc_srgb) const { img.resize(get_pixel_width(), get_pixel_height()); img.set_all(g_black_color); @@ -1589,11 +1291,11 @@ namespace basisu if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA)) { pvrtc4_image pi(m_width, m_height); - + if (get_total_blocks() != pi.get_total_blocks()) return false; - - memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); + + memcpy((void *)&pi.get_blocks()[0], (const void *)get_ptr(), get_size_in_bytes()); pi.deswizzle(); @@ -1615,7 +1317,7 @@ namespace basisu { const void* pBlock = get_block_ptr(bx, by); - if (!unpack_block(m_fmt, pBlock, pixels)) + if (!unpack_block(m_fmt, pBlock, pixels, astc_srgb)) success = false; img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); @@ -1662,14 +1364,14 @@ namespace basisu return success; } - + // KTX1 texture file writing static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; // KTX/GL enums enum { - KTX_ENDIAN = 0x04030201, + KTX_ENDIAN = 0x04030201, KTX_OPPOSITE_ENDIAN = 0x01020304, KTX_ETC1_RGB8_OES = 0x8D64, KTX_RED = 0x1903, @@ -1689,7 +1391,7 @@ namespace basisu KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F, KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00, KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, - + KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1, KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2, @@ -1731,7 +1433,7 @@ namespace basisu KTX_COMPRESSED_R11_EAC = 0x9270, KTX_COMPRESSED_RG11_EAC = 0x9272 }; - + struct ktx_header { uint8_t m_identifier[12]; @@ -1753,7 +1455,7 @@ namespace basisu }; // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index] - bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag) + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag, bool astc_srgb_flag) { if (!gpu_images.size()) { @@ -1773,7 +1475,7 @@ namespace basisu return false; } } - + for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++) { const gpu_image_vec &levels = gpu_images[array_index]; @@ -1905,18 +1607,101 @@ namespace basisu { internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR; // TODO: should we write RGB? We don't support generating HDR 6x6 with alpha. - base_internal_fmt = KTX_RGBA; + base_internal_fmt = KTX_RGBA; break; } // We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC. - case texture_format::cASTC_LDR_4x4: case texture_format::cASTC_HDR_4x4: - case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC + case texture_format::cUASTC_HDR_4x4: // UASTC_HDR 4x4 is just HDR-only ASTC { internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR; base_internal_fmt = KTX_RGBA; break; } + case texture_format::cASTC_LDR_4x4: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_4x4_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_5x4: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_5x4_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_5x5: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_5x5_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_6x5: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_6x5_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_6x6: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_6x6_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_8x5: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_8x5_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_8x6: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_8x6_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_10x5: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_10x5_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_10x6: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_10x6_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_8x8: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_8x8_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_10x8: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_10x8_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_10x10: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_10x10_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_12x10: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_12x10_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR; + base_internal_fmt = KTX_RGBA; + break; + } + case texture_format::cASTC_LDR_12x12: + { + internal_fmt = !astc_srgb_flag ? KTX_COMPRESSED_RGBA_ASTC_12x12_KHR : KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR; + base_internal_fmt = KTX_RGBA; + break; + } case texture_format::cATC_RGB: { internal_fmt = KTX_ATC_RGB_AMD; @@ -1987,6 +1772,12 @@ namespace basisu append_vector(ktx_data, (uint8_t*)&header, sizeof(header)); + fmt_debug_printf("create_ktx_texture_file: {}x{}, astc_srgb_flag: {}, basis::texture_format: {}, internalFormat: {}, baseInternalFormat: {}, arrayElements: {}, faces: {}, mipLevels: {}\n", + width, height, astc_srgb_flag, (uint32_t)fmt, + (uint32_t)header.m_glInternalFormat, (uint32_t)header.m_glBaseInternalFormat, + (uint32_t)header.m_numberOfArrayElements, (uint32_t)header.m_numberOfFaces, + (uint32_t)header.m_numberOfMipmapLevels); + for (uint32_t level_index = 0; level_index < total_levels; level_index++) { uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); @@ -2169,7 +1960,7 @@ namespace basisu } // array_index } #endif - + // Write DDS file using tinydds TinyDDS_WriteCallbacks cbs; cbs.error = [](void* user, char const* msg) { BASISU_NOTE_UNUSED(user); fprintf(stderr, "tinydds: %s\n", msg); }; @@ -2179,7 +1970,7 @@ namespace basisu uint32_t mipmap_sizes[32]; const void* mipmap_ptrs[32]; - + clear_obj(mipmap_sizes); clear_obj(mipmap_ptrs); @@ -2197,7 +1988,7 @@ namespace basisu { case texture_format::cBC1_NV: case texture_format::cBC1_AMD: - case texture_format::cBC1: + case texture_format::cBC1: tinydds_fmt = use_srgb_format ? TDDS_BC1_RGBA_SRGB_BLOCK : TDDS_BC1_RGBA_UNORM_BLOCK; break; case texture_format::cBC3: @@ -2225,10 +2016,10 @@ namespace basisu } } - // DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however. - // Trying to work around this here results in invalid mipmaps. - //width = (width + 3) & ~3; - //height = (height + 3) & ~3; + // Note DirectXTex's DDSView doesn't handle odd sizes textures correctly. RenderDoc loads them fine, however. + + fmt_debug_printf("write_dds_file: {}x{}, basis::texture_format: {}, tinydds_fmt: {}, slices: {}, mipLevels: {}, cubemap_flag: {}, use_srgb_format: {}\n", + width, height, (uint32_t)fmt, tinydds_fmt, slices, total_levels, cubemap_flag, use_srgb_format); bool status = TinyDDS_WriteImage(&cbs, &dds_data, @@ -2248,7 +2039,7 @@ namespace basisu fprintf(stderr, "write_dds_file: Failed creating DDS file\n"); return false; } - + return true; } @@ -2267,7 +2058,7 @@ namespace basisu return true; } - + bool read_uncompressed_dds_file(const char* pFilename, basisu::vector &ldr_mips, basisu::vector& hdr_mips) { const uint32_t MAX_IMAGE_DIM = 16384; @@ -2278,7 +2069,7 @@ namespace basisu cbs.allocFn = [](void* user, size_t size) -> void* { BASISU_NOTE_UNUSED(user); return malloc(size); }; cbs.freeFn = [](void* user, void* memory) { BASISU_NOTE_UNUSED(user); free(memory); }; cbs.readFn = [](void* user, void* buffer, size_t byteCount) -> size_t { return (size_t)fread(buffer, 1, byteCount, (FILE*)user); }; - + #ifdef _MSC_VER cbs.seekFn = [](void* user, int64_t ofs) -> bool { return _fseeki64((FILE*)user, ofs, SEEK_SET) == 0; }; cbs.tellFn = [](void* user) -> int64_t { return _ftelli64((FILE*)user); }; @@ -2318,7 +2109,7 @@ namespace basisu error_printf("Failed parsing DDS header in file \"%s\"\n", pFilename); goto failure; } - + if ((!TinyDDS_Is2D(ctx)) || (TinyDDS_ArraySlices(ctx) > 1) || (TinyDDS_IsCubemap(ctx))) { error_printf("Unsupported DDS texture type in file \"%s\"\n", pFilename); @@ -2327,7 +2118,7 @@ namespace basisu width = TinyDDS_Width(ctx); height = TinyDDS_Height(ctx); - + if (!width || !height) { error_printf("DDS texture dimensions invalid in file \"%s\"\n", pFilename); @@ -2339,7 +2130,7 @@ namespace basisu error_printf("DDS texture dimensions too large in file \"%s\"\n", pFilename); goto failure; } - + tfmt = TinyDDS_GetFormat(ctx); switch (tfmt) { @@ -2387,7 +2178,7 @@ namespace basisu } memcpy(ldr_mips[level].get_ptr(), pImage, image_size); - + if ((tfmt == TDDS_B8G8R8A8_SRGB) || (tfmt == TDDS_B8G8R8A8_UNORM)) { // Swap R and B components. @@ -2416,7 +2207,7 @@ namespace basisu else if (fmt == cRGBA_HALF) { hdr_mips[level].resize(level_width, level_height); - + if ((hdr_mips[level].get_total_pixels() * sizeof(basist::half_float) * 4 != image_size)) { assert(0); @@ -2426,7 +2217,7 @@ namespace basisu // Unpack half to float. const basist::half_float* pSrc_comps = static_cast(pImage); vec4F* pDst_texels = hdr_mips[level].get_ptr(); - + for (uint32_t i = 0; i < total_level_texels; i++) { (*pDst_texels)[0] = basist::half_to_float(pSrc_comps[0]); @@ -2462,7 +2253,7 @@ namespace basisu uint8_vec filedata; if (extension == "ktx") { - if (!create_ktx_texture_file(filedata, g, cubemap_flag)) + if (!create_ktx_texture_file(filedata, g, cubemap_flag, use_srgb_format)) return false; } else if (extension == "pvr") @@ -2500,7 +2291,7 @@ namespace basisu } //const uint32_t OUT_FILE_MAGIC = 'TEXC'; - struct out_file_header + struct out_file_header { packed_uint<4> m_magic; packed_uint<4> m_pad; @@ -2532,16 +2323,92 @@ namespace basisu fwrite(&hdr, sizeof(hdr), 1, pFile); fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile); - + return fclose(pFile) != EOF; } - // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting +#pragma pack(push, 1) + struct astc_file_header + { + uint8_t m_sig[4]; + uint8_t m_block_dim[3]; + uint8_t m_width[3]; + uint8_t m_height[3]; + uint8_t m_depth[3]; + }; +#pragma pack(pop) + + bool read_astc_file(const uint8_t *pImage_data, size_t image_data_size, vector2D& blocks, uint32_t &block_width, uint32_t &block_height, uint32_t &width, uint32_t &height) + { + block_width = 0; + block_height = 0; + width = 0; + height = 0; + blocks.resize(0, 0); + + if (image_data_size < (sizeof(astc_file_header) + sizeof(astc_helpers::astc_block))) + return false; + + const astc_file_header* pHeader = reinterpret_cast(pImage_data); + + if ((pHeader->m_sig[0] != 0x13) || (pHeader->m_sig[1] != 0xAB) || (pHeader->m_sig[2] != 0xA1) || (pHeader->m_sig[3] != 0x5C)) + return false; + + const uint32_t block_depth = pHeader->m_block_dim[2]; + if (block_depth != 1) + return false; + + if ((pHeader->m_depth[0] != 1) || (pHeader->m_depth[1] != 0) || (pHeader->m_depth[2] != 0)) + return false; + + block_width = pHeader->m_block_dim[0]; + block_height = pHeader->m_block_dim[1]; + + if (!astc_helpers::is_valid_block_size(block_width, block_height)) + return false; + + width = pHeader->m_width[0] | ((uint32_t)pHeader->m_width[1] << 8u) | ((uint32_t)pHeader->m_width[2] << 16u); + height = pHeader->m_height[0] | ((uint32_t)pHeader->m_height[1] << 8u) | ((uint32_t)pHeader->m_height[2] << 16u); + + const uint32_t MAX_DIM = 32768; + if ((!width) || (width > MAX_DIM) || (!height) || (height > MAX_DIM)) + return false; + + const uint32_t num_blocks_x = (width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (height + block_height - 1) / block_height; + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + size_t total_expected_size = sizeof(astc_file_header) + (size_t)total_blocks * sizeof(astc_helpers::astc_block); + if (image_data_size < total_expected_size) + return false; + + if (!blocks.try_resize(num_blocks_x, num_blocks_y)) + return false; + + memcpy(blocks.get_ptr(), pImage_data + sizeof(astc_file_header), (size_t)total_blocks * sizeof(astc_helpers::astc_block)); + + return true; + } + + bool read_astc_file(const char* pFilename, vector2D& blocks, uint32_t& block_width, uint32_t& block_height, uint32_t& width, uint32_t& height) + { + uint8_vec file_data; + if (!read_file_to_vec(pFilename, file_data)) + return false; + + if (!file_data.size()) + return false; + + return read_astc_file(file_data.get_ptr(), file_data.size(), blocks, block_width, block_height, width, height); + } + + // The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting // its usefulness/relevance. // https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y) { - assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0)); + assert(pBlocks && (dim_x > 0) && (dim_y > 0)); + assert(astc_helpers::is_valid_block_size(block_width, block_height)); uint8_vec file_data; file_data.push_back(0x13); @@ -2578,5 +2445,6 @@ namespace basisu return write_vec_to_file(pFilename, file_data); } - + } // basisu + diff --git a/external/basis_universal/encoder/basisu_gpu_texture.h b/external/basis_universal/encoder/basisu_gpu_texture.h index 06f2cc09bf..bcfc9cb494 100644 --- a/external/basis_universal/encoder/basisu_gpu_texture.h +++ b/external/basis_universal/encoder/basisu_gpu_texture.h @@ -1,5 +1,5 @@ // basisu_gpu_texture.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ // limitations under the License. #pragma once #include "../transcoder/basisu.h" +#include "../transcoder/basisu_astc_helpers.h" #include "basisu_etc.h" namespace basisu @@ -49,11 +50,12 @@ namespace basisu inline texture_format get_format() const { return m_fmt; } inline bool is_hdr() const { return is_hdr_texture_format(m_fmt); } - + inline bool is_ldr() const { return !is_hdr_texture_format(m_fmt); } + // Width/height in pixels inline uint32_t get_pixel_width() const { return m_width; } inline uint32_t get_pixel_height() const { return m_height; } - + // Width/height in blocks, row pitch is assumed to be m_blocks_x. inline uint32_t get_blocks_x() const { return m_blocks_x; } inline uint32_t get_blocks_y() const { return m_blocks_y; } @@ -68,7 +70,7 @@ namespace basisu inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); } inline const uint64_vec &get_blocks() const { return m_blocks; } - + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } inline uint64_t *get_ptr() { return &m_blocks[0]; } @@ -101,12 +103,14 @@ namespace basisu m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block); } - // Unpacks LDR textures only. - bool unpack(image& img) const; + // Unpacks LDR textures only. Asserts and returns false otherwise. + // astc_srgb: true to use the ASTC sRGB decode profile, false for linear. + // For XUASTC LDR, this should match what was used during encoding. For ETC1S/UASTC LDR 4x4, this should be false. + bool unpack(image& img, bool astc_srgb) const; - // Unpacks HDR textures only. + // Unpacks HDR textures only. Asserts and returns false otherwise. bool unpack_hdr(imagef& img) const; - + inline void override_dimensions(uint32_t w, uint32_t h) { m_width = w; @@ -121,9 +125,10 @@ namespace basisu typedef basisu::vector gpu_image_vec; - // KTX1 file writing - bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag); - + // KTX1 file writing - compatible with ARM's astcenc tool, and some other tools. + // Note astc_linear_flag used to be always effectively true in older code. It's ignored for ASTC HDR formats. + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag, bool astc_srgb_flag); + bool does_dds_support_format(texture_format fmt); bool write_dds_file(uint8_vec& dds_data, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); bool write_dds_file(const char* pFilename, const basisu::vector& gpu_images, bool cubemap_flag, bool use_srgb_format); @@ -135,7 +140,7 @@ namespace basisu bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag, bool use_srgb_format); bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format); bool write_compressed_texture_file(const char *pFilename, const gpu_image &g, bool use_srgb_format); - + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); // GPU texture block unpacking @@ -145,8 +150,15 @@ namespace basisu void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); + +#if 0 bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); + int determine_bc7_mode(const void* pBlock); + int determine_bc7_mode_4_index_mode(const void* pBlock); + int determine_bc7_mode_4_or_5_rotation(const void* pBlock); bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); // full format +#endif + bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs = 4 * 3); // full format, outputs HALF values, RGB texels only (not RGBA) void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment. @@ -155,15 +167,18 @@ namespace basisu bool unpack_pvrtc2(const void* p, color_rgba* pPixels); void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); - + // unpack_block() is primarily intended to unpack texture data created by the transcoder. // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not yet a complete implementation. // Unpacks LDR texture formats only. - bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); + bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels, bool astc_srgb); // Unpacks HDR texture formats only. bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels); - + + bool read_astc_file(const uint8_t* pImage_data, size_t image_data_size, vector2D& blocks, uint32_t& block_width, uint32_t& block_height, uint32_t& width, uint32_t& height); + bool read_astc_file(const char* pFilename, vector2D& blocks, uint32_t& block_width, uint32_t& block_height, uint32_t& width, uint32_t& height); bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y); - + } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_math.h b/external/basis_universal/encoder/basisu_math.h index 66bb749a5b..24b83859ae 100644 --- a/external/basis_universal/encoder/basisu_math.h +++ b/external/basis_universal/encoder/basisu_math.h @@ -8,10 +8,10 @@ namespace bu_math // Would prefer using SSE1 etc. but that would require implementing multiple versions and platform divergence (needing more testing). BASISU_FORCE_INLINE float inv_sqrt(float v) { - union - { - float flt; - uint32_t ui; + union + { + float flt; + uint32_t ui; } un; un.flt = v; @@ -20,6 +20,16 @@ namespace bu_math return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt)); } + inline float linstep(float edge0, float edge1, float x) + { + assert(edge1 != edge0); + + // Scale, and clamp x to 0..1 range + x = basisu::saturate((x - edge0) / (edge1 - edge0)); + + return x; + } + inline float smoothstep(float edge0, float edge1, float x) { assert(edge1 != edge0); @@ -1130,12 +1140,12 @@ namespace bu_math template Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) { - static_assert((int)Z::num_rows == (int)X::num_rows); - static_assert((int)Z::num_cols == (int)Y::num_cols); - static_assert((int)X::num_cols == (int)Y::num_rows); + static_assert(Z::num_rows == X::num_rows); + static_assert(Z::num_cols == Y::num_cols); + static_assert(X::num_cols == Y::num_rows); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); - for (int r = 0; r < X::num_rows; r++) - for (int c = 0; c < Y::num_cols; c++) + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); for (uint32_t i = 1; i < X::num_cols; i++) @@ -1148,12 +1158,12 @@ namespace bu_math template Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) { - static_assert((int)Z::num_rows == (int)X::num_cols); - static_assert((int)Z::num_cols == (int)Y::num_cols); - static_assert((int)X::num_rows == (int)Y::num_rows); + static_assert(Z::num_rows == X::num_cols); + static_assert(Z::num_cols == Y::num_cols); + static_assert(X::num_rows == Y::num_rows); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); - for (int r = 0; r < X::num_cols; r++) - for (int c = 0; c < Y::num_cols; c++) + for (uint32_t r = 0; r < X::num_cols; r++) + for (uint32_t c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(0, r) * rhs(0, c); for (uint32_t i = 1; i < X::num_rows; i++) @@ -1166,12 +1176,12 @@ namespace bu_math template Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) { - static_assert((int)Z::num_rows == (int)X::num_rows); - static_assert((int)Z::num_cols == (int)Y::num_rows); - static_assert((int)X::num_cols == (int)Y::num_cols); + static_assert(Z::num_rows == X::num_rows); + static_assert(Z::num_cols == Y::num_rows); + static_assert(X::num_cols == Y::num_cols); assert(((void*)&result != (void*)&lhs) && ((void*)&result != (void*)&rhs)); - for (int r = 0; r < X::num_rows; r++) - for (int c = 0; c < Y::num_rows; c++) + for (uint32_t r = 0; r < X::num_rows; r++) + for (uint32_t c = 0; c < Y::num_rows; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); for (uint32_t i = 1; i < X::num_cols; i++) @@ -1180,17 +1190,21 @@ namespace bu_math } return result; } - + template class matrix { public: typedef T scalar_type; + static const uint32_t num_rows = R; + static const uint32_t num_cols = C; +#if 0 enum { num_rows = R, num_cols = C }; +#endif typedef vec col_vec; typedef vec < (R > 1) ? (R - 1) : 0, T > subcol_vec; @@ -2144,7 +2158,7 @@ namespace bu_math static inline matrix make_tensor_product_matrix(const row_vec& v, const row_vec& w) { matrix ret; - for (int r = 0; r < num_rows; r++) + for (uint32_t r = 0; r < num_rows; r++) ret[r] = row_vec::mul_components(v.broadcast(r), w); return ret; } @@ -2485,6 +2499,31 @@ namespace basisu int64_t m_total2; }; + class tracked_stat_float + { + public: + tracked_stat_float() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(float val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat_float& operator += (float val) { update(val); return *this; } + + inline uint32_t get_number_of_values() { return m_num; } + inline float get_total() const { return m_total; } + inline float get_total2() const { return m_total2; } + + inline float get_average() const { return m_num ? m_total / (float)m_num : 0.0f; }; + inline float get_std_dev() const { return m_num ? sqrt((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + float m_total; + float m_total2; + }; + class tracked_stat_dbl { public: @@ -2521,14 +2560,14 @@ namespace basisu FloatType m_mad; // mean absolute deviation FloatType m_min, m_max, m_range; // min and max values, and max-min FloatType m_len; // length of values as a vector (Euclidean norm or L2 norm) - FloatType m_coeff_of_var; // coefficient of variation (std_dev/mean), High CV: Indicates greater variability relative to the mean, meaning the data values are more spread out, + FloatType m_coeff_of_var; // coefficient of variation (std_dev/mean), High CV: Indicates greater variability relative to the mean, meaning the data values are more spread out, // Low CV : Indicates less variability relative to the mean, meaning the data values are more consistent. - - FloatType m_skewness; // Skewness = 0: The data is perfectly symmetric around the mean, - // Skewness > 0: The data is positively skewed (right-skewed), + + FloatType m_skewness; // Skewness = 0: The data is perfectly symmetric around the mean, + // Skewness > 0: The data is positively skewed (right-skewed), // Skewness < 0: The data is negatively skewed (left-skewed) // 0-.5 approx. symmetry, .5-1 moderate skew, >= 1 highly skewed - + FloatType m_kurtosis; // Excess Kurtosis: Kurtosis = 0: The distribution has normal kurtosis (mesokurtic) // Kurtosis > 0: The distribution is leptokurtic, with heavy tails and a sharp peak // Kurtosis < 0: The distribution is platykurtic, with light tails and a flatter peak @@ -2538,9 +2577,12 @@ namespace basisu FloatType m_median; uint32_t m_median_index; - stats() - { - clear(); + FloatType m_five_percent_lo; // avg of the lowest 5%, must calc median to be valid + FloatType m_five_percent_hi; // avg of the lowest 5%, must calc median to be valid + + stats() + { + clear(); } void clear() @@ -2557,9 +2599,12 @@ namespace basisu m_skewness = 0; m_kurtosis = 0; m_any_zero = false; - + m_median = 0; m_median_index = 0; + + m_five_percent_lo = 0; + m_five_percent_hi = 0; } template @@ -2588,13 +2633,26 @@ namespace basisu m_median = (m_median + vals[(n / 2) - 1].first) * .5f; m_median_index = vals[n / 2].second; + + // sum and avg low 5% and high 5% + const uint32_t p5_n = clamp((n + 10) / 20, 1u, n); + FloatType lo5_sum = 0, hi5_sum = 0; + + for (uint32_t i = 0; i < p5_n; i++) + { + lo5_sum += vals[i].first; + hi5_sum += vals[n - 1 - i].first; + } + + m_five_percent_lo = lo5_sum / FloatType(p5_n); + m_five_percent_hi = hi5_sum / FloatType(p5_n); } template void calc(uint32_t n, const T* pVals, uint32_t stride = 1, bool calc_median_flag = false) { clear(); - + if (!n) return; @@ -2609,10 +2667,10 @@ namespace basisu if (v == 0.0f) m_any_zero = true; - + m_total += v; m_total_sq += v * v; - + if (!i) { m_min = v; @@ -2634,12 +2692,12 @@ namespace basisu m_avg = m_total / nd; m_avg_sq = m_total_sq / nd; m_rms = sqrt(m_avg_sq); - + for (uint32_t i = 0; i < n; i++) { FloatType v = (FloatType)pVals[i * stride]; FloatType d = v - m_avg; - + const FloatType d2 = d * d; const FloatType d3 = d2 * d; const FloatType d4 = d3 * d; @@ -2680,6 +2738,55 @@ namespace basisu m_total += v; } + + const FloatType nd = (FloatType)n; + + m_avg = m_total / nd; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + FloatType d = v - m_avg; + + const FloatType d2 = d * d; + + m_var += d2; + } + + m_var /= nd; + m_std_dev = sqrt(m_var); + } + + // Only compute average, variance and standard deviation. + template + void calc_simplified_with_range(uint32_t n, const T* pVals, uint32_t stride = 1) + { + clear(); + + if (!n) + return; + + m_n = n; + + for (uint32_t i = 0; i < n; i++) + { + FloatType v = (FloatType)pVals[i * stride]; + + m_total += v; + + if (!i) + { + m_min = v; + m_max = v; + } + else + { + m_min = minimum(m_min, v); + m_max = maximum(m_max, v); + } + } + + m_range = m_max - m_min; const FloatType nd = (FloatType)n; @@ -2712,7 +2819,7 @@ namespace basisu FloatType m_euclidean_dist; // euclidean distance between values as vectors FloatType m_cosine_sim; // normalized dot products of values as vectors FloatType m_min_diff, m_max_diff; // minimum/maximum abs difference between values - + comparative_stats() { clear(); @@ -2738,7 +2845,7 @@ namespace basisu clear(); if (!n) return; - + stats temp_a_stats; if (!pA_stats) { @@ -2757,7 +2864,7 @@ namespace basisu { const FloatType fa = (FloatType)pA[i * a_stride]; const FloatType fb = (FloatType)pB[i * b_stride]; - + if ((pA_stats->m_min >= 0.0f) && (pB_stats->m_min >= 0.0f)) { const FloatType ld = log(fa + 1.0f) - log(fb + 1.0f); @@ -2766,7 +2873,7 @@ namespace basisu const FloatType diff = fa - fb; const FloatType abs_diff = fabs(diff); - + m_mse += diff * diff; m_mae += abs_diff; @@ -2781,7 +2888,7 @@ namespace basisu } const FloatType nd = (FloatType)n; - + m_euclidean_dist = sqrt(m_mse); m_mse /= nd; @@ -2790,7 +2897,7 @@ namespace basisu m_mae /= nd; m_cov /= nd; - + FloatType dv = (pA_stats->m_std_dev * pB_stats->m_std_dev); if (dv != 0.0f) m_pearson = m_cov / dv; @@ -2883,9 +2990,9 @@ namespace basisu const FloatType fb = (FloatType)pB[i * b_stride]; const FloatType diff = fa - fb; - + m_mse += diff * diff; - + const FloatType da = fa - pA_stats->m_avg; const FloatType db = fb - pB_stats->m_avg; m_cov += da * db; @@ -2897,7 +3004,7 @@ namespace basisu m_mse /= nd; m_rmse = sqrt(m_mse); - + m_cov /= nd; } @@ -2938,7 +3045,7 @@ namespace basisu m_cov /= nd; } }; - + class stat_history { public: @@ -3083,12 +3190,12 @@ namespace basisu uint32_t lowerBits = float_union.u & 0xFFFF; // Round to nearest or even - if ((lowerBits & 0x8000) && + if ((lowerBits & 0x8000) && ((lowerBits > 0x8000) || ((lowerBits == 0x8000) && (upperBits & 1))) ) { // Round up - upperBits += 1; + upperBits += 1; // Check for overflow in the exponent after rounding up if (((upperBits & 0x7F80) == 0x7F80) && ((upperBits & 0x007F) == 0)) @@ -3140,6 +3247,7 @@ namespace basisu return res; } - - + + } // namespace basisu + diff --git a/external/basis_universal/encoder/basisu_uastc_enc.cpp b/external/basis_universal/encoder/basisu_uastc_enc.cpp index 701534cc56..88448eef0b 100644 --- a/external/basis_universal/encoder/basisu_uastc_enc.cpp +++ b/external/basis_universal/encoder/basisu_uastc_enc.cpp @@ -1,5 +1,5 @@ // basisu_uastc_enc.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -223,7 +223,7 @@ namespace basisu default: break; } -#endif +#endif uint32_t total_planes = 1; switch (result.m_uastc_mode) @@ -319,7 +319,15 @@ namespace basisu const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0); if (comp_plane == plane_index) - std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]); + { + // shut up a useless gcc warning + assert((c * 2 + 1) < (int)sizeof(endpoints)); + + if ((c * 2 + 1) < (int)sizeof(endpoints)) + { + std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]); + } + } } } else @@ -456,7 +464,7 @@ namespace basisu printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); #endif } - + // MODE 0 // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB @@ -507,7 +515,7 @@ namespace basisu astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; - + bool invert = false; if (pForce_selectors == nullptr) @@ -1128,7 +1136,7 @@ namespace basisu } // common_pattern } - // MODE 5 + // MODE 5 // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) { @@ -1259,7 +1267,7 @@ namespace basisu ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); - + color_cell_compressor_params ccell_params_a; memset(&ccell_params_a, 0, sizeof(ccell_params_a)); @@ -1416,9 +1424,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); -#ifdef _DEBUG +#ifdef _DEBUG assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); -#endif +#endif part_pixel_index[y][x] = num_part_pixels[astc_part]; part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; @@ -1583,7 +1591,7 @@ namespace basisu } #endif } - + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) @@ -2499,7 +2507,7 @@ namespace basisu total_results++; } } - + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) { uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; @@ -2542,18 +2550,18 @@ namespace basisu color_rgba tblock_bc1[4][4]; dxt1_block tbc1_block[8]; basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0); - unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]); + unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0], false); color_rgba tblock_hint0_bc1[4][4]; color_rgba tblock_hint1_bc1[4][4]; - + etc_block etc1_blk; memset(&etc1_blk, 0, sizeof(etc1_blk)); eac_a8_block etc2_blk; memset(&etc2_blk, 0, sizeof(etc2_blk)); etc2_blk.m_multiplier = 1; - + // Pack to UASTC, then unpack, because the endpoints may be swapped. uastc_block temp_ublock; @@ -2561,7 +2569,7 @@ namespace basisu unpacked_uastc_block temp_ublock_unpacked; unpack_uastc(temp_ublock, temp_ublock_unpacked, false); - + unpacked_uastc_block ublock; memset(&ublock, 0, sizeof(ublock)); ublock.m_mode = best_results.m_uastc_mode; @@ -2579,7 +2587,7 @@ namespace basisu { transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false); - unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]); + unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0], false); } // HINT0 @@ -2590,8 +2598,8 @@ namespace basisu else { transcode_uastc_to_bc1_hint0(ublock, &b); - - unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); + + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0], false); } // Compute block errors @@ -2612,7 +2620,7 @@ namespace basisu const float err_thresh0 = 1.075f; const float err_thresh1 = 1.075f; - + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) bc1_hint0 = true; @@ -2779,7 +2787,7 @@ namespace basisu uint32_t first_flip = 0, last_flip = 2; uint32_t first_individ = 0, last_individ = 2; - + if (flags & cPackUASTCETC1DisableFlipAndIndividual) { last_flip = 1; @@ -2791,7 +2799,7 @@ namespace basisu first_flip = 1; last_flip = first_flip + 1; } - + for (uint32_t flip = first_flip; flip < last_flip; flip++) { trial_block.set_flip_bit(flip != 0); @@ -2799,7 +2807,7 @@ namespace basisu for (uint32_t individ = first_individ; individ < last_individ; individ++) { const uint32_t mul = individ ? 15 : 31; - + trial_block.set_diff_bit(individ == 0); color_rgba unbiased_block_colors[2]; @@ -2815,7 +2823,7 @@ namespace basisu { const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; - + avg_color[0] += p.r; avg_color[1] += p.g; avg_color[2] += p.b; @@ -2833,13 +2841,13 @@ namespace basisu unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][3] = 0; - + } // subset - + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) { const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; - + color_rgba block_colors[2]; for (uint32_t subset = 0; subset < 2; subset++) block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; @@ -2873,7 +2881,7 @@ namespace basisu uint64_t best_subset_err = UINT64_MAX; const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); - + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) { trial_block.set_inten_table(subset, inten_table); @@ -3008,7 +3016,7 @@ namespace basisu uint32_t m_table; uint32_t m_multiplier; }; - + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) { assert(num_pixels <= 16); @@ -3152,7 +3160,7 @@ namespace basisu solid_results.m_common_pattern = 0; solid_results.m_solid_color = first_color; memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); - + etc_block etc1_blk; uint32_t etc1_bias = 0; @@ -3168,17 +3176,17 @@ namespace basisu return; } - + int level = flags & 7; const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); //const bool etc1_perceptual = true; - + // TODO: This uses 64KB of stack space! uastc_encode_results results[MAX_ENCODE_RESULTS]; - + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); - + // Set all options to slowest, then configure from there depending on the selected level. uint32_t mode_mask = UINT32_MAX; uint32_t uber_level = 6; @@ -3189,12 +3197,12 @@ namespace basisu uint32_t least_squares_passes = 2; bool bc1_hints = true; bool only_use_la_on_transparent_blocks = false; - + switch (level) { case cPackUASTCLevelFastest: { - mode_mask = (1 << 0) | (1 << 8) | + mode_mask = (1 << 0) | (1 << 8) | (1 << 11) | (1 << 12) | (1 << 15); always_try_alpha_modes = false; @@ -3220,7 +3228,7 @@ namespace basisu estimate_partition = true; break; } - case cPackUASTCLevelDefault: + case cPackUASTCLevelDefault: { mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | @@ -3258,9 +3266,9 @@ namespace basisu // HACK HACK //mode_mask &= ~(1 << 18); //mode_mask = (1 << 18)| (1 << 10); - + uint32_t total_results = 0; - + if (only_use_la_on_transparent_blocks) { if ((is_la) && (!has_alpha)) @@ -3268,7 +3276,7 @@ namespace basisu } const bool try_alpha_modes = has_alpha || always_try_alpha_modes; - + bc7enc_compress_block_params comp_params; memset(&comp_params, 0, sizeof(comp_params)); comp_params.m_max_partitions_mode1 = 64; @@ -3343,7 +3351,7 @@ namespace basisu } assert(total_results); - + // Fix up the errors so we consistently have LA, RGB, or RGBA error. for (uint32_t i = 0; i < total_results; i++) { @@ -3377,7 +3385,7 @@ namespace basisu } } } - + unpacked_uastc_block unpacked_ublock; memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); @@ -3447,7 +3455,7 @@ namespace basisu encode_bc7_block(&bc7_data, &bc7_results); color_rgba decoded_bc7_block[4][4]; - unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]); + unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0], false); // Compute BC7 error uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err; @@ -3544,7 +3552,7 @@ namespace basisu const uastc_encode_results& best_results = results[best_index]; const uint32_t best_mode = best_results.m_uastc_mode; const astc_block_desc& best_astc_results = best_results.m_astc; - + color_rgba decoded_uastc_block[4][4]; bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); (void)success; @@ -3562,14 +3570,14 @@ namespace basisu basist::uastc_block temp_block; pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); - + basist::color32 temp_block_unpacked[4][4]; success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); VALIDATE(success); // Now round trip to packed ASTC and back, then decode to pixels. uint32_t astc_data[4]; - + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); else @@ -3587,7 +3595,7 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); - + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); @@ -3601,7 +3609,7 @@ namespace basisu bool bc1_hint0 = false, bc1_hint1 = false; if (bc1_hints) compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); - + eac_a8_block eac_a8_blk; if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) { @@ -3613,7 +3621,7 @@ namespace basisu uastc_pack_eac_a8_results eac8_a8_results; memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); - + // All we care about for hinting is the table and multiplier. eac_a8_blk.m_table = eac8_a8_results.m_table; eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; @@ -3810,11 +3818,11 @@ namespace basisu { std::size_t operator()(selector_bitsequence const& s) const noexcept { - return hash_hsieh((const uint8_t*)&s, sizeof(s)); + return basist::hash_hsieh((const uint8_t*)&s, sizeof(s)); } }; - - static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) { debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); @@ -3823,7 +3831,7 @@ namespace basisu const bool perceptual = false; std::unordered_map selector_history; - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const basist::uastc_block& blk = pBlocks[block_index]; @@ -3872,8 +3880,8 @@ namespace basisu basist::encode_bc7_block(&b7_block, &b7_results); color_rgba decoded_b7_blk[4][4]; - unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); - + unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0], false); + uint64_t bc7_err = 0; for (uint32_t i = 0; i < 16; i++) bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); @@ -3928,7 +3936,7 @@ namespace basisu float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; - // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find // selector bit patterns which don't increase the overall block error too much. for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) { @@ -3981,7 +3989,7 @@ namespace basisu basist::encode_bc7_block(&trial_b7_block, &trial_b7_results); color_rgba decoded_trial_b7_blk[4][4]; - unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]); + unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0], false); uint64_t trial_bc7_err = 0; for (uint32_t i = 0; i < 16; i++) @@ -4050,7 +4058,7 @@ namespace basisu color_rgba decoded_trial_uastc_block[4][4]; bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); assert(success); - + BASISU_NOTE_UNUSED(success); uint64_t trial_uastc_err = 0; @@ -4077,7 +4085,7 @@ namespace basisu // Write the modified block pBlocks[block_index] = best_block; - + } // if (best_block_index != block_index) { @@ -4093,8 +4101,8 @@ namespace basisu return true; } - - // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. // There's A LOT that can be done better in here, but it's a start. // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. @@ -4133,7 +4141,7 @@ namespace basisu { std::lock_guard lck(stat_mutex); - + all_succeeded = all_succeeded && status; total_skipped += job_skipped; total_modified += job_modified; @@ -4152,7 +4160,12 @@ namespace basisu } debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); - + return status; } } // namespace basisu + + + + + diff --git a/external/basis_universal/encoder/basisu_wasm_api.cpp b/external/basis_universal/encoder/basisu_wasm_api.cpp new file mode 100644 index 0000000000..d4db364d20 --- /dev/null +++ b/external/basis_universal/encoder/basisu_wasm_api.cpp @@ -0,0 +1,319 @@ +// File: basisu_wasm_api.cpp - Simplified compression API for WASM WASI modules and Python native support. +// Also useable by plain C callers. +#include "basisu_comp.h" +#include "basisu_wasm_api.h" + +using namespace basisu; + +static inline uint64_t wasm_offset(void* p) +{ + return (uint64_t)(uintptr_t)p; +} + +static inline uint8_t* wasm_ptr(uint64_t offset) +{ + return (uint8_t*)(uintptr_t)offset; +} + +BU_WASM_EXPORT("bu_get_version") +uint32_t bu_get_version() +{ + printf("Hello from basisu_wasm_api.cpp version %u\n", BASISU_LIB_VERSION); + + return BASISU_LIB_VERSION; +} + +BU_WASM_EXPORT("bu_enable_debug_printf") +void bu_enable_debug_printf(uint32_t flag) +{ + enable_debug_printf(flag != 0); +} + +BU_WASM_EXPORT("bu_init") +void bu_init() +{ + basisu_encoder_init(false, false); +} + +// Memory alloc/free — stubs +BU_WASM_EXPORT("bu_alloc") +uint64_t bu_alloc(uint64_t size) +{ + void* p = malloc((size_t)size); + return wasm_offset(p); +} + +BU_WASM_EXPORT("bu_free") +void bu_free(uint64_t ofs) +{ + free(wasm_ptr(ofs)); +} + +const uint32_t COMP_PARAMS_MAGIC = 0x43504D50; // "CPMP" + +struct comp_params +{ + uint32_t m_magic = COMP_PARAMS_MAGIC; + + comp_params() + { + clear(); + } + + void clear() + { + assert(m_magic == COMP_PARAMS_MAGIC); + + m_comp_data.clear(); + m_images.clear(); + m_imagesf.clear(); + + m_stats.clear(); + } + + uint8_vec m_comp_data; + + basisu::vector m_images; + basisu::vector m_imagesf; + + image_stats m_stats; +}; + +BU_WASM_EXPORT("bu_new_comp_params") +uint64_t bu_new_comp_params() +{ + comp_params* p = new comp_params; + return wasm_offset(p); +} + +BU_WASM_EXPORT("bu_delete_comp_params") +wasm_bool_t bu_delete_comp_params(uint64_t params_ofs) +{ + comp_params* p = (comp_params*)wasm_ptr(params_ofs); + if (!p) + return false; + + assert(p->m_magic == COMP_PARAMS_MAGIC); + if (p->m_magic != COMP_PARAMS_MAGIC) + return false; + + delete p; + + return true; +} + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_size") +uint64_t bu_comp_params_get_comp_data_size(uint64_t params_ofs) +{ + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return 0; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return 0; + + return pParams->m_comp_data.size(); +} + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_ofs") +uint64_t bu_comp_params_get_comp_data_ofs(uint64_t params_ofs) +{ + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return 0; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return 0; + + return wasm_offset(pParams->m_comp_data.get_ptr()); +} + +BU_WASM_EXPORT("bu_comp_params_clear") +wasm_bool_t bu_comp_params_clear(uint64_t params_ofs) +{ + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return false; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return false; + + pParams->clear(); + + return true; +} + +// Caller wants to give us a LDR/SDR 32bpp RGBA mipmap level (4 bytes per pixel) +BU_WASM_EXPORT("bu_comp_params_set_image_rgba32") +wasm_bool_t bu_comp_params_set_image_rgba32( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes) +{ + if ((!width) || (!height) || (!pitch_in_bytes)) + return false; + + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return false; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return false; + + const uint8_t* pImage = wasm_ptr(img_data_ofs); + if (!pImage) + return false; + + const uint32_t bytes_per_pixel = sizeof(color_rgba); + + if (pitch_in_bytes < width * bytes_per_pixel) + return false; + + if (image_index >= pParams->m_images.size()) + { + if (!pParams->m_images.try_resize(image_index + 1)) + return false; + } + + basisu::image& dst_img = pParams->m_images[image_index]; + + dst_img.resize(width, height); + + if (pitch_in_bytes == width * bytes_per_pixel) + { + memcpy(dst_img.get_ptr(), pImage, pitch_in_bytes * height); + } + else + { + for (uint32_t y = 0; y < height; y++) + { + const uint8_t* pSrc_row = pImage + y * pitch_in_bytes; + + uint8_t* pDst_row = (uint8_t *)&dst_img(0, y); + + memcpy(pDst_row, pSrc_row, width * bytes_per_pixel); + } // y + } + + return true; +} + +// Caller wants to give us a float RGBA mipmap level (4*4=16 bytes per pixel) +BU_WASM_EXPORT("bu_comp_params_set_image_float_rgba") +wasm_bool_t bu_comp_params_set_image_float_rgba( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes) +{ + if ((!width) || (!height) || (!pitch_in_bytes)) + return false; + + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return false; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return false; + + const uint8_t* pImage = wasm_ptr(img_data_ofs); + if (!pImage) + return false; + + const uint32_t bytes_per_pixel = sizeof(float) * 4; + + if (pitch_in_bytes < width * bytes_per_pixel) + return false; + + if (image_index >= pParams->m_images.size()) + { + if (!pParams->m_imagesf.try_resize(image_index + 1)) + return false; + } + + basisu::imagef& dst_img = pParams->m_imagesf[image_index]; + + dst_img.resize(width, height); + + if (pitch_in_bytes == width * bytes_per_pixel) + { + memcpy((void *)dst_img.get_ptr(), (const void *)pImage, pitch_in_bytes * height); + } + else + { + for (uint32_t y = 0; y < height; y++) + { + const uint8_t* pSrc_row = pImage + y * pitch_in_bytes; + + uint8_t* pDst_row = (uint8_t*)&dst_img(0, y); + + memcpy(pDst_row, pSrc_row, width * bytes_per_pixel); + } // y + } + + return true; +} + +BU_WASM_EXPORT("bu_compress_texture") +wasm_bool_t bu_compress_texture( + uint64_t params_ofs, + uint32_t desired_basis_tex_format, // basis_tex_format + int quality_level, int effort_level, + uint64_t flags_and_quality, float low_level_uastc_rdo_or_dct_quality) +{ + //enable_debug_printf((flags_and_quality & cFlagDebug) != 0); + + comp_params* pParams = (comp_params*)wasm_ptr(params_ofs); + if (!pParams) + return false; + + assert(pParams->m_magic == COMP_PARAMS_MAGIC); + if (pParams->m_magic != COMP_PARAMS_MAGIC) + return false; + + pParams->m_comp_data.clear(); + + if (desired_basis_tex_format >= (uint32_t)basist::basis_tex_format::cTotalFormats) + return false; + + if (!pParams->m_images.size() && !pParams->m_imagesf.size()) + return false; + if (pParams->m_images.size() && pParams->m_imagesf.size()) + return false; + + size_t comp_size = 0; + + void* pComp_data = basis_compress_internal( + (basist::basis_tex_format)desired_basis_tex_format, + pParams->m_images.size() ? &pParams->m_images : nullptr, + pParams->m_imagesf.size() ? &pParams->m_imagesf : nullptr, + (uint32_t)flags_and_quality, + low_level_uastc_rdo_or_dct_quality, + &comp_size, + &pParams->m_stats, + quality_level, + effort_level); + + if (!pComp_data) + return false; + + if (!pParams->m_comp_data.try_resize(comp_size)) + { + basis_free_data(pComp_data); + return false; + } + + memcpy(pParams->m_comp_data.get_ptr(), pComp_data, comp_size); + + basis_free_data(pComp_data); + + return true; +} diff --git a/external/basis_universal/encoder/basisu_wasm_api.h b/external/basis_universal/encoder/basisu_wasm_api.h new file mode 100644 index 0000000000..92266bc417 --- /dev/null +++ b/external/basis_universal/encoder/basisu_wasm_api.h @@ -0,0 +1,58 @@ +// File: basisu_wasm_api.h +#pragma once +#include "basisu_wasm_api_common.h" + +BU_WASM_EXPORT("bu_get_version") +uint32_t bu_get_version(); + +BU_WASM_EXPORT("bu_enable_debug_printf") +void bu_enable_debug_printf(uint32_t flag); + +BU_WASM_EXPORT("bu_init") +void bu_init(); + +BU_WASM_EXPORT("bu_alloc") +uint64_t bu_alloc(uint64_t size); + +BU_WASM_EXPORT("bu_free") +void bu_free(uint64_t ofs); + +BU_WASM_EXPORT("bu_new_comp_params") +uint64_t bu_new_comp_params(); + +BU_WASM_EXPORT("bu_delete_comp_params") +wasm_bool_t bu_delete_comp_params(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_size") +uint64_t bu_comp_params_get_comp_data_size(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_get_comp_data_ofs") +uint64_t bu_comp_params_get_comp_data_ofs(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_clear") +wasm_bool_t bu_comp_params_clear(uint64_t params_ofs); + +BU_WASM_EXPORT("bu_comp_params_set_image_rgba32") +wasm_bool_t bu_comp_params_set_image_rgba32( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes); + +BU_WASM_EXPORT("bu_comp_params_set_image_float_rgba") +wasm_bool_t bu_comp_params_set_image_float_rgba( + uint64_t params_ofs, + uint32_t image_index, + uint64_t img_data_ofs, + uint32_t width, uint32_t height, + uint32_t pitch_in_bytes); + +BU_WASM_EXPORT("bu_compress_texture") +wasm_bool_t bu_compress_texture( + uint64_t params_ofs, + uint32_t desired_basis_tex_format, + int quality_level, int effort_level, + uint64_t flags_and_quality, + float low_level_uastc_rdo_or_dct_quality); + diff --git a/external/basis_universal/encoder/basisu_wasm_api_common.h b/external/basis_universal/encoder/basisu_wasm_api_common.h new file mode 100644 index 0000000000..d3fe1ae391 --- /dev/null +++ b/external/basis_universal/encoder/basisu_wasm_api_common.h @@ -0,0 +1,156 @@ +// File: basisu_wasm_api_common.h +#pragma once +#include "stdint.h" + +#if defined(__wasm__) + #if defined(__cplusplus) + #define BU_WASM_EXPORT(name) __attribute__((export_name(name))) extern "C" + #else + #define BU_WASM_EXPORT(name) __attribute__((export_name(name))) + #endif +#elif defined(__cplusplus) + #define BU_WASM_EXPORT(name) extern "C" +#else + #define BU_WASM_EXPORT(name) +#endif + +// wasm_bool_t is an alias for uint32_t +typedef uint32_t wasm_bool_t; + +// Compression constants + +#define BU_QUALITY_MIN 0 +#define BU_QUALITY_MAX 100 + +#define BU_EFFORT_MIN 0 +#define BU_EFFORT_MAX 10 +#define BU_EFFORT_SUPER_FAST = 0 +#define BU_EFFORT_FAST = 2 +#define BU_EFFORT_NORMAL = 5 +#define BU_EFFORT_DEFAULT = 2 +#define BU_EFFORT_SLOW = 8 +#define BU_EFFORT_VERY_SLOW = 10 + +#define BU_COMP_FLAGS_NONE (0) +#define BU_COMP_FLAGS_USE_OPENCL (1 << 8 ) +#define BU_COMP_FLAGS_THREADED (1 << 9 ) +#define BU_COMP_FLAGS_DEBUG_OUTPUT (1 << 10) +#define BU_COMP_FLAGS_KTX2_OUTPUT (1 << 11) +#define BU_COMP_FLAGS_KTX2_UASTC_ZSTD (1 << 12) +#define BU_COMP_FLAGS_SRGB (1 << 13) +#define BU_COMP_FLAGS_GEN_MIPS_CLAMP (1 << 14) +#define BU_COMP_FLAGS_GEN_MIPS_WRAP (1 << 15) +#define BU_COMP_FLAGS_Y_FLIP (1 << 16) +#define BU_COMP_FLAGS_PRINT_STATS (1 << 18) +#define BU_COMP_FLAGS_PRINT_STATUS (1 << 19) +#define BU_COMP_FLAGS_DEBUG_IMAGES (1 << 20) +#define BU_COMP_FLAGS_REC2020 (1 << 21) +#define BU_COMP_FLAGS_VALIDATE_OUTPUT (1 << 22) + +#define BU_COMP_FLAGS_XUASTC_LDR_FULL_ARITH (0) +#define BU_COMP_FLAGS_XUASTC_LDR_HYBRID (1 << 23) +#define BU_COMP_FLAGS_XUASTC_LDR_FULL_ZSTD (2 << 23) +#define BU_COMP_FLAGS_XUASTC_LDR_SYNTAX_SHIFT (23) +#define BU_COMP_FLAGS_XUASTC_LDR_SYNTAX_MASK (3) + +#define BU_COMP_FLAGS_TEXTURE_TYPE_2D (0 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_2D_ARRAY (1 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_CUBEMAP_ARRAY (2 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_VIDEO_FRAMES (3 << 25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_SHIFT (25) +#define BU_COMP_FLAGS_TEXTURE_TYPE_MASK (3) + +#define BU_COMP_FLAGS_VERBOSE (BU_COMP_FLAGS_DEBUG_OUTPUT | BU_COMP_FLAGS_PRINT_STATS | BU_COMP_FLAGS_PRINT_STATUS) + +// basist::basis_tex_format: the supported .ktx2 (and .basis) file format types +#define BTF_ETC1S 0 +#define BTF_UASTC_LDR_4X4 1 +#define BTF_UASTC_HDR_4X4 2 +#define BTF_ASTC_HDR_6X6 3 +#define BTF_UASTC_HDR_6X6 4 +#define BTF_XUASTC_LDR_4X4 5 +#define BTF_XUASTC_LDR_5X4 6 +#define BTF_XUASTC_LDR_5X5 7 +#define BTF_XUASTC_LDR_6X5 8 +#define BTF_XUASTC_LDR_6X6 9 +#define BTF_XUASTC_LDR_8X5 10 +#define BTF_XUASTC_LDR_8X6 11 +#define BTF_XUASTC_LDR_10X5 12 +#define BTF_XUASTC_LDR_10X6 13 +#define BTF_XUASTC_LDR_8X8 14 +#define BTF_XUASTC_LDR_10X8 15 +#define BTF_XUASTC_LDR_10X10 16 +#define BTF_XUASTC_LDR_12X10 17 +#define BTF_XUASTC_LDR_12X12 18 +#define BTF_ASTC_LDR_4X4 19 +#define BTF_ASTC_LDR_5X4 20 +#define BTF_ASTC_LDR_5X5 21 +#define BTF_ASTC_LDR_6X5 22 +#define BTF_ASTC_LDR_6X6 23 +#define BTF_ASTC_LDR_8X5 24 +#define BTF_ASTC_LDR_8X6 25 +#define BTF_ASTC_LDR_10X5 26 +#define BTF_ASTC_LDR_10X6 27 +#define BTF_ASTC_LDR_8X8 28 +#define BTF_ASTC_LDR_10X8 29 +#define BTF_ASTC_LDR_10X10 30 +#define BTF_ASTC_LDR_12X10 31 +#define BTF_ASTC_LDR_12X12 32 +#define BTF_TOTAL_FORMATS 33 + +// Transcoding constants + +// basist::transcoder_texture_format: the supported transcode GPU texture formats +#define TF_ETC1_RGB 0 +#define TF_ETC2_RGBA 1 +#define TF_BC1_RGB 2 +#define TF_BC3_RGBA 3 +#define TF_BC4_R 4 +#define TF_BC5_RG 5 +#define TF_BC7_RGBA 6 +#define TF_PVRTC1_4_RGB 8 +#define TF_PVRTC1_4_RGBA 9 +#define TF_ASTC_LDR_4X4_RGBA 10 +#define TF_ATC_RGB 11 +#define TF_ATC_RGBA 12 +#define TF_FXT1_RGB 17 +#define TF_PVRTC2_4_RGB 18 +#define TF_PVRTC2_4_RGBA 19 +#define TF_ETC2_EAC_R11 20 +#define TF_ETC2_EAC_RG11 21 +#define TF_BC6H 22 +#define TF_ASTC_HDR_4X4_RGBA 23 +#define TF_RGBA32 13 +#define TF_RGB565 14 +#define TF_BGR565 15 +#define TF_RGBA4444 16 +#define TF_RGB_HALF 24 +#define TF_RGBA_HALF 25 +#define TF_RGB_9E5 26 +#define TF_ASTC_HDR_6X6_RGBA 27 +#define TF_ASTC_LDR_5X4_RGBA 28 +#define TF_ASTC_LDR_5X5_RGBA 29 +#define TF_ASTC_LDR_6X5_RGBA 30 +#define TF_ASTC_LDR_6X6_RGBA 31 +#define TF_ASTC_LDR_8X5_RGBA 32 +#define TF_ASTC_LDR_8X6_RGBA 33 +#define TF_ASTC_LDR_10X5_RGBA 34 +#define TF_ASTC_LDR_10X6_RGBA 35 +#define TF_ASTC_LDR_8X8_RGBA 36 +#define TF_ASTC_LDR_10X8_RGBA 37 +#define TF_ASTC_LDR_10X10_RGBA 38 +#define TF_ASTC_LDR_12X10_RGBA 39 +#define TF_ASTC_LDR_12X12_RGBA 40 +#define TF_TOTAL_TEXTURE_FORMATS 41 + +// basist::basisu_decode_flags: Transcode decode flags (bt_ktx2_transcode_image_level decode_flags parameter, logically OR'd) +#define DECODE_FLAGS_PVRTC_DECODE_TO_NEXT_POW2 2 +#define DECODE_FLAGS_TRANSCODE_ALPHA_DATA_TO_OPAQUE_FORMATS 4 +#define DECODE_FLAGS_BC1_FORBID_THREE_COLOR_BLOCKS 8 +#define DECODE_FLAGS_OUTPUT_HAS_ALPHA_INDICES 16 +#define DECODE_FLAGS_HIGH_QUALITY 32 +#define DECODE_FLAGS_NO_ETC1S_CHROMA_FILTERING 64 +#define DECODE_FLAGS_NO_DEBLOCK_FILTERING 128 +#define DECODE_FLAGS_STRONGER_DEBLOCK_FILTERING 256 +#define DECODE_FLAGS_FORCE_DEBLOCK_FILTERING 512 +#define DECODE_FLAGS_XUASTC_LDR_DISABLE_FAST_BC7_TRANSCODING 1024 diff --git a/external/basis_universal/encoder/basisu_wasm_transcoder_api.cpp b/external/basis_universal/encoder/basisu_wasm_transcoder_api.cpp new file mode 100644 index 0000000000..ab46525b14 --- /dev/null +++ b/external/basis_universal/encoder/basisu_wasm_transcoder_api.cpp @@ -0,0 +1,1071 @@ +// basisu_wasm_transcoder_api.cpp - Transcoding API support for WASM WASI modules and Python native support. +// Also useable by plain C callers. +#include +#include +#include +#include "../transcoder/basisu_transcoder.h" +#include "basisu_wasm_transcoder_api.h" + +using namespace basisu; +using namespace basist; + +static inline uint64_t wasm_offset(void* p) +{ + return (uint64_t)(uintptr_t)p; +} + +static inline uint8_t* wasm_ptr(uint64_t offset) +{ + return (uint8_t*)(uintptr_t)offset; +} + +// High-level functions + +BU_WASM_EXPORT("bt_get_version") +uint32_t bt_get_version() +{ + printf("Hello from basisu_wasm_transcoder_api.cpp version %u\n", BASISD_LIB_VERSION); + + return BASISD_LIB_VERSION; +} + +BU_WASM_EXPORT("bt_enable_debug_printf") +void bt_enable_debug_printf(uint32_t flag) +{ + enable_debug_printf(flag != 0); +} + +BU_WASM_EXPORT("bt_init") +void bt_init() +{ + basisu_transcoder_init(); +} + +// Memory alloc/free — stubs +BU_WASM_EXPORT("bt_alloc") +uint64_t bt_alloc(uint64_t size) +{ + void* p = malloc((size_t)size); + return wasm_offset(p); +} + +BU_WASM_EXPORT("bt_free") +void bt_free(uint64_t mem_ofs) +{ + free(wasm_ptr(mem_ofs)); +} + +// basis_tex_format helpers + +BU_WASM_EXPORT("bt_basis_tex_format_is_xuastc_ldr") +wasm_bool_t bt_basis_tex_format_is_xuastc_ldr(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_is_xuastc_ldr(tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_tex_format_is_astc_ldr") +wasm_bool_t bt_basis_tex_format_is_astc_ldr(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_is_astc_ldr(tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_width") +uint32_t bt_basis_tex_format_get_block_width(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_get_block_width(tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_height") +uint32_t bt_basis_tex_format_get_block_height(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_get_block_height(tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_tex_format_is_hdr") +wasm_bool_t bt_basis_tex_format_is_hdr(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_is_hdr(tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_tex_format_is_ldr") +wasm_bool_t bt_basis_tex_format_is_ldr(uint32_t basis_tex_fmt_u32) +{ + assert(basis_tex_fmt_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format tex_fmt = static_cast(basis_tex_fmt_u32); + + return basis_tex_format_is_ldr(tex_fmt); +} + +// transcoder_texture_format helpers + +BU_WASM_EXPORT("bt_basis_get_bytes_per_block_or_pixel") +uint32_t bt_basis_get_bytes_per_block_or_pixel(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_get_bytes_per_block_or_pixel(fmt); +} + +BU_WASM_EXPORT("bt_basis_transcoder_format_has_alpha") +wasm_bool_t bt_basis_transcoder_format_has_alpha(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_transcoder_format_has_alpha(fmt); +} + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_hdr") +wasm_bool_t bt_basis_transcoder_format_is_hdr(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_transcoder_format_is_hdr(fmt); +} + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_ldr") +wasm_bool_t bt_basis_transcoder_format_is_ldr(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_transcoder_format_is_ldr(fmt); +} + +BU_WASM_EXPORT("bt_basis_transcoder_texture_format_is_astc") +wasm_bool_t bt_basis_transcoder_texture_format_is_astc(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_is_transcoder_texture_format_astc(fmt); +} + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_uncompressed") +wasm_bool_t bt_basis_transcoder_format_is_uncompressed(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_transcoder_format_is_uncompressed(fmt); +} + +BU_WASM_EXPORT("bt_basis_get_uncompressed_bytes_per_pixel") +uint32_t bt_basis_get_uncompressed_bytes_per_pixel(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_get_uncompressed_bytes_per_pixel(fmt); +} + +BU_WASM_EXPORT("bt_basis_get_block_width") +uint32_t bt_basis_get_block_width(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_get_block_width(fmt); +} + +BU_WASM_EXPORT("bt_basis_get_block_height") +uint32_t bt_basis_get_block_height(uint32_t transcoder_texture_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format fmt = static_cast(transcoder_texture_format_u32); + + return basis_get_block_height(fmt); +} + +BU_WASM_EXPORT("bt_basis_get_transcoder_texture_format_from_basis_tex_format") +uint32_t bt_basis_get_transcoder_texture_format_from_basis_tex_format(uint32_t basis_tex_format_u32) +{ + assert(basis_tex_format_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + basis_tex_format fmt = static_cast(basis_tex_format_u32); + + return (uint32_t)basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(fmt); +} + +BU_WASM_EXPORT("bt_basis_is_format_supported") +wasm_bool_t bt_basis_is_format_supported(uint32_t transcoder_texture_format_u32, uint32_t basis_tex_format_u32) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + assert(basis_tex_format_u32 < (uint32_t)basis_tex_format::cTotalFormats); + + transcoder_texture_format transcoder_tex_fmt = static_cast(transcoder_texture_format_u32); + basis_tex_format basis_tex_fmt = static_cast(basis_tex_format_u32); + + return basis_is_format_supported(transcoder_tex_fmt, basis_tex_fmt); +} + +BU_WASM_EXPORT("bt_basis_compute_transcoded_image_size_in_bytes") +uint32_t bt_basis_compute_transcoded_image_size_in_bytes(uint32_t transcoder_texture_format_u32, uint32_t orig_width, uint32_t orig_height) +{ + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + + transcoder_texture_format transcoder_tex_fmt = static_cast(transcoder_texture_format_u32); + + return basis_compute_transcoded_image_size_in_bytes(transcoder_tex_fmt, orig_width, orig_height); +} + +// KTX2 inspection and transcoding helpers + +const uint32_t KTX2_HANDLE_MAGIC = 0xAB21EF20; + +struct ktx2_handle_t +{ + uint32_t m_magic = KTX2_HANDLE_MAGIC; + ktx2_transcoder m_transcoder; +}; + +BU_WASM_EXPORT("bt_ktx2_open") +uint64_t bt_ktx2_open(uint64_t data_mem_ofs, uint32_t data_len) +{ + if (!data_mem_ofs || (data_len < 4)) + return 0; + + ktx2_handle_t* pHandle = new ktx2_handle_t(); + + if (!pHandle->m_transcoder.init(wasm_ptr(data_mem_ofs), data_len)) + { + delete pHandle; + return 0; + } + + return wasm_offset(pHandle); +} + +BU_WASM_EXPORT("bt_ktx2_close") +void bt_ktx2_close(uint64_t handle) +{ + if (!handle) + return; + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return; + + delete pHandle; +} + +BU_WASM_EXPORT("bt_ktx2_get_width") +uint32_t bt_ktx2_get_width(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_width(); +} + +BU_WASM_EXPORT("bt_ktx2_get_height") +uint32_t bt_ktx2_get_height(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_height(); +} + +BU_WASM_EXPORT("bt_ktx2_get_levels") +uint32_t bt_ktx2_get_levels(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_levels(); +} + +BU_WASM_EXPORT("bt_ktx2_get_faces") +uint32_t bt_ktx2_get_faces(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_faces(); +} + +BU_WASM_EXPORT("bt_ktx2_get_layers") +uint32_t bt_ktx2_get_layers(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_layers(); +} + +BU_WASM_EXPORT("bt_ktx2_get_basis_tex_format") +uint32_t bt_ktx2_get_basis_tex_format(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return (uint32_t)pHandle->m_transcoder.get_basis_tex_format(); +} + +BU_WASM_EXPORT("bt_ktx2_is_etc1s") +wasm_bool_t bt_ktx2_is_etc1s(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_etc1s(); +} + +BU_WASM_EXPORT("bt_ktx2_is_uastc_ldr_4x4") +wasm_bool_t bt_ktx2_is_uastc_ldr_4x4(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_uastc(); +} + +BU_WASM_EXPORT("bt_ktx2_is_hdr") +wasm_bool_t bt_ktx2_is_hdr(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_hdr(); +} + +BU_WASM_EXPORT("bt_ktx2_is_hdr_4x4") +wasm_bool_t bt_ktx2_is_hdr_4x4(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_hdr_4x4(); +} + +BU_WASM_EXPORT("bt_ktx2_is_hdr_6x6") +wasm_bool_t bt_ktx2_is_hdr_6x6(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_hdr_6x6(); +} + +BU_WASM_EXPORT("bt_ktx2_is_ldr") +wasm_bool_t bt_ktx2_is_ldr(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_ldr(); +} + +BU_WASM_EXPORT("bt_ktx2_is_astc_ldr") +wasm_bool_t bt_ktx2_is_astc_ldr(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_astc_ldr(); +} + +BU_WASM_EXPORT("bt_ktx2_is_xuastc_ldr") +wasm_bool_t bt_ktx2_is_xuastc_ldr(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_xuastc_ldr(); +} + +BU_WASM_EXPORT("bt_ktx2_get_block_width") +uint32_t bt_ktx2_get_block_width(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_block_width(); +} + +BU_WASM_EXPORT("bt_ktx2_get_block_height") +uint32_t bt_ktx2_get_block_height(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_block_height(); +} + +BU_WASM_EXPORT("bt_ktx2_has_alpha") +wasm_bool_t bt_ktx2_has_alpha(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.get_has_alpha(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_model") +uint32_t bt_ktx2_get_dfd_color_model(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_color_model(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_primaries") +uint32_t bt_ktx2_get_dfd_color_primaries(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_color_primaries(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_transfer_func") +uint32_t bt_ktx2_get_dfd_transfer_func(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_transfer_func(); +} + +BU_WASM_EXPORT("bt_ktx2_is_srgb") +wasm_bool_t bt_ktx2_is_srgb(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_srgb(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_flags") +uint32_t bt_ktx2_get_dfd_flags(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_flags(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_total_samples") +uint32_t bt_ktx2_get_dfd_total_samples(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_total_samples(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id0") +uint32_t bt_ktx2_get_dfd_channel_id0(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_channel_id0(); +} + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id1") +uint32_t bt_ktx2_get_dfd_channel_id1(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + return pHandle->m_transcoder.get_dfd_channel_id1(); +} + +BU_WASM_EXPORT("bt_ktx2_is_video") +wasm_bool_t bt_ktx2_is_video(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.is_video(); +} + +BU_WASM_EXPORT("bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier") +float bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier(uint64_t handle) +{ + if (!handle) + { + assert(0); + return 0.0f; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0.0f; + + return pHandle->m_transcoder.get_ldr_hdr_upconversion_nit_multiplier(); +} + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_width") +uint32_t bt_ktx2_get_level_orig_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_orig_width; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_height") +uint32_t bt_ktx2_get_level_orig_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_orig_height; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_width") +uint32_t bt_ktx2_get_level_actual_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_width; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_height") +uint32_t bt_ktx2_get_level_actual_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_height; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_x") +uint32_t bt_ktx2_get_level_num_blocks_x(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_num_blocks_x; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_y") +uint32_t bt_ktx2_get_level_num_blocks_y(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_num_blocks_y; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_total_blocks") +uint32_t bt_ktx2_get_level_total_blocks(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return 0; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return 0; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return 0; + + return level_info.m_total_blocks; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_alpha_flag") +wasm_bool_t bt_ktx2_get_level_alpha_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return false; + + return level_info.m_alpha_flag; +} + +BU_WASM_EXPORT("bt_ktx2_get_level_iframe_flag") +wasm_bool_t bt_ktx2_get_level_iframe_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + // FIXME slow - most info is thrown away. + ktx2_image_level_info level_info; + if (!pHandle->m_transcoder.get_image_level_info(level_info, level_index, layer_index, face_index)) + return false; + + return level_info.m_iframe_flag; +} + +BU_WASM_EXPORT("bt_ktx2_start_transcoding") +wasm_bool_t bt_ktx2_start_transcoding(uint64_t handle) +{ + if (!handle) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + return pHandle->m_transcoder.start_transcoding(); +} + +const uint32_t KTX2_TRANSCODE_STATE_MAGIC = 0x2B21CF21; + +struct ktx2_transcode_state_t +{ + uint32_t m_magic = KTX2_TRANSCODE_STATE_MAGIC; + + ktx2_transcoder_state m_state; +}; + +BU_WASM_EXPORT("bt_ktx2_create_transcode_state") +uint64_t bt_ktx2_create_transcode_state() +{ + return wasm_offset(new ktx2_transcode_state_t()); +} + +BU_WASM_EXPORT("bt_ktx2_destroy_transcode_state") +void bt_ktx2_destroy_transcode_state(uint64_t handle) +{ + if (!handle) + return; + + ktx2_transcode_state_t* pState = reinterpret_cast(wasm_ptr(handle)); + + assert(pState->m_magic == KTX2_TRANSCODE_STATE_MAGIC); + if (pState->m_magic != KTX2_TRANSCODE_STATE_MAGIC) + return; + + delete pState; +} + +BU_WASM_EXPORT("bt_ktx2_transcode_image_level") +wasm_bool_t bt_ktx2_transcode_image_level( + uint64_t ktx2_handle, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint64_t output_block_mem_ofs, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t transcoder_texture_format_u32, + uint32_t decode_flags, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + int channel0, int channel1, + uint64_t state_handle) +{ + if ((!ktx2_handle) || (!output_block_mem_ofs)) + { + assert(0); + return false; + } + + ktx2_handle_t* pHandle = reinterpret_cast(wasm_ptr(ktx2_handle)); + + assert(pHandle->m_magic == KTX2_HANDLE_MAGIC); + if (pHandle->m_magic != KTX2_HANDLE_MAGIC) + return false; + + assert(transcoder_texture_format_u32 < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + transcoder_texture_format tex_fmt = static_cast(transcoder_texture_format_u32); + + ktx2_transcode_state_t* pTranscode_state = nullptr; + + if (state_handle) + { + pTranscode_state = reinterpret_cast(wasm_ptr(state_handle)); + + assert(pTranscode_state->m_magic == KTX2_TRANSCODE_STATE_MAGIC); + if (pTranscode_state->m_magic != KTX2_TRANSCODE_STATE_MAGIC) + return false; + } + + return pHandle->m_transcoder.transcode_image_level( + level_index, layer_index, face_index, + wasm_ptr(output_block_mem_ofs), output_blocks_buf_size_in_blocks_or_pixels, + tex_fmt, + decode_flags, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, channel0, channel1, + pTranscode_state ? &pTranscode_state->m_state : nullptr); +} diff --git a/external/basis_universal/encoder/basisu_wasm_transcoder_api.h b/external/basis_universal/encoder/basisu_wasm_transcoder_api.h new file mode 100644 index 0000000000..a7389acee9 --- /dev/null +++ b/external/basis_universal/encoder/basisu_wasm_transcoder_api.h @@ -0,0 +1,216 @@ +// File: basisu_wasm_transcoder_api.h - Transcoding API support for WASM WASI modules and Python native support. +#pragma once +#include "basisu_wasm_api_common.h" + +// High-level functions + +BU_WASM_EXPORT("bt_get_version") +uint32_t bt_get_version(); + +BU_WASM_EXPORT("bt_enable_debug_printf") +void bt_enable_debug_printf(uint32_t flag); + +BU_WASM_EXPORT("bt_init") +void bt_init(); + +BU_WASM_EXPORT("bt_alloc") +uint64_t bt_alloc(uint64_t size); + +BU_WASM_EXPORT("bt_free") +void bt_free(uint64_t ofs); + +// basis_tex_format helpers + +BU_WASM_EXPORT("bt_basis_tex_format_is_xuastc_ldr") +wasm_bool_t bt_basis_tex_format_is_xuastc_ldr(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_astc_ldr") +wasm_bool_t bt_basis_tex_format_is_astc_ldr(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_width") +uint32_t bt_basis_tex_format_get_block_width(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_get_block_height") +uint32_t bt_basis_tex_format_get_block_height(uint32_t basis_tex_fmt_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_hdr") +wasm_bool_t bt_basis_tex_format_is_hdr(uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_tex_format_is_ldr") +wasm_bool_t bt_basis_tex_format_is_ldr(uint32_t basis_tex_format_u32); + +// transcoder_texture_format helpers + +BU_WASM_EXPORT("bt_basis_get_bytes_per_block_or_pixel") +uint32_t bt_basis_get_bytes_per_block_or_pixel(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_has_alpha") +wasm_bool_t bt_basis_transcoder_format_has_alpha(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_hdr") +wasm_bool_t bt_basis_transcoder_format_is_hdr(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_ldr") +wasm_bool_t bt_basis_transcoder_format_is_ldr(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_texture_format_is_astc") +wasm_bool_t bt_basis_transcoder_texture_format_is_astc(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_transcoder_format_is_uncompressed") +wasm_bool_t bt_basis_transcoder_format_is_uncompressed(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_uncompressed_bytes_per_pixel") +uint32_t bt_basis_get_uncompressed_bytes_per_pixel(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_block_width") +uint32_t bt_basis_get_block_width(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_block_height") +uint32_t bt_basis_get_block_height(uint32_t transcoder_texture_format_u32); + +BU_WASM_EXPORT("bt_basis_get_transcoder_texture_format_from_basis_tex_format") +uint32_t bt_basis_get_transcoder_texture_format_from_basis_tex_format(uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_is_format_supported") +wasm_bool_t bt_basis_is_format_supported(uint32_t transcoder_texture_format_u32, uint32_t basis_tex_format_u32); + +BU_WASM_EXPORT("bt_basis_compute_transcoded_image_size_in_bytes") +uint32_t bt_basis_compute_transcoded_image_size_in_bytes(uint32_t transcoder_texture_format_u32, uint32_t orig_width, uint32_t orig_height); + +// Transcoding +BU_WASM_EXPORT("bt_ktx2_open") +uint64_t bt_ktx2_open(uint64_t data_mem_ofs, uint32_t data_len); + +BU_WASM_EXPORT("bt_ktx2_close") +void bt_ktx2_close(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_width") +uint32_t bt_ktx2_get_width(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_height") +uint32_t bt_ktx2_get_height(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_levels") +uint32_t bt_ktx2_get_levels(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_faces") +uint32_t bt_ktx2_get_faces(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_layers") +uint32_t bt_ktx2_get_layers(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_basis_tex_format") +uint32_t bt_ktx2_get_basis_tex_format(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_etc1s") +wasm_bool_t bt_ktx2_is_etc1s(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_uastc_ldr_4x4") +wasm_bool_t bt_ktx2_is_uastc_ldr_4x4(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr") +wasm_bool_t bt_ktx2_is_hdr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr_4x4") +wasm_bool_t bt_ktx2_is_hdr_4x4(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_hdr_6x6") +wasm_bool_t bt_ktx2_is_hdr_6x6(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_ldr") +wasm_bool_t bt_ktx2_is_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_astc_ldr") +wasm_bool_t bt_ktx2_is_astc_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_xuastc_ldr") +wasm_bool_t bt_ktx2_is_xuastc_ldr(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_block_width") +uint32_t bt_ktx2_get_block_width(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_block_height") +uint32_t bt_ktx2_get_block_height(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_has_alpha") +wasm_bool_t bt_ktx2_has_alpha(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_model") +uint32_t bt_ktx2_get_dfd_color_model(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_color_primaries") +uint32_t bt_ktx2_get_dfd_color_primaries(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_transfer_func") +uint32_t bt_ktx2_get_dfd_transfer_func(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_srgb") +wasm_bool_t bt_ktx2_is_srgb(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_flags") +uint32_t bt_ktx2_get_dfd_flags(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_total_samples") +uint32_t bt_ktx2_get_dfd_total_samples(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id0") +uint32_t bt_ktx2_get_dfd_channel_id0(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_dfd_channel_id1") +uint32_t bt_ktx2_get_dfd_channel_id1(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_is_video") +wasm_bool_t bt_ktx2_is_video(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier") +float bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_width") +uint32_t bt_ktx2_get_level_orig_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_orig_height") +uint32_t bt_ktx2_get_level_orig_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_width") +uint32_t bt_ktx2_get_level_actual_width(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_actual_height") +uint32_t bt_ktx2_get_level_actual_height(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_x") +uint32_t bt_ktx2_get_level_num_blocks_x(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_num_blocks_y") +uint32_t bt_ktx2_get_level_num_blocks_y(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_total_blocks") +uint32_t bt_ktx2_get_level_total_blocks(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_alpha_flag") +wasm_bool_t bt_ktx2_get_level_alpha_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_get_level_iframe_flag") +wasm_bool_t bt_ktx2_get_level_iframe_flag(uint64_t handle, uint32_t level_index, uint32_t layer_index, uint32_t face_index); + +BU_WASM_EXPORT("bt_ktx2_start_transcoding") +wasm_bool_t bt_ktx2_start_transcoding(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_create_transcode_state") +uint64_t bt_ktx2_create_transcode_state(); + +BU_WASM_EXPORT("bt_ktx2_destroy_transcode_state") +void bt_ktx2_destroy_transcode_state(uint64_t handle); + +BU_WASM_EXPORT("bt_ktx2_transcode_image_level") +wasm_bool_t bt_ktx2_transcode_image_level( + uint64_t ktx2_handle, // handle to KTX2 file, see bt_ktx2_open() + uint32_t level_index, uint32_t layer_index, uint32_t face_index, // KTX2 level/layer/face to transcode + uint64_t output_block_mem_ofs, // allocate using bt_alloc() + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t transcoder_texture_format_u32, // target format, TF_ETC1_RGB etc. + uint32_t decode_flags, // DECODE_FLAGS_ + uint32_t output_row_pitch_in_blocks_or_pixels, // can be 0 + uint32_t output_rows_in_pixels, // can be 0 + int channel0, int channel1, // both default to -1 + uint64_t state_handle); // thread local state: can be 0, or bt_ktx2_create_transcode_state() + diff --git a/external/basis_universal/encoder/cppspmd_flow.h b/external/basis_universal/encoder/cppspmd_flow.h index 07b592455d..3e83e9eda0 100644 --- a/external/basis_universal/encoder/cppspmd_flow.h +++ b/external/basis_universal/encoder/cppspmd_flow.h @@ -48,7 +48,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() m_kernel_exec = andnot(m_exec, m_kernel_exec); m_exec = exec_mask::all_off(); } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) { @@ -61,7 +61,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaske m_kernel_exec = m_kernel_exec & orig_kernel_exec; m_exec = m_exec & orig_exec; - + check_masks(); } @@ -69,9 +69,9 @@ struct scoped_unmasked_restorer { spmd_kernel *m_pKernel; exec_mask m_orig_exec, m_orig_kernel_exec; - - CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_kernel_exec(pKernel->m_kernel_exec) { @@ -79,15 +79,15 @@ struct scoped_unmasked_restorer pKernel->m_exec = exec_mask::all_on(); } - CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; m_pKernel->check_masks(); } }; -#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); #define SPMD_UNMASKED_END } #if 0 @@ -113,9 +113,9 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) #ifdef _DEBUG assert(m_in_loop); #endif - + exec_mask cond_exec(cond); - + m_exec = andnot(m_exec & cond_exec, m_exec); check_masks(); @@ -157,7 +157,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfB m_exec = em; elseBody(); } - + m_exec = orig_exec; } @@ -165,7 +165,7 @@ template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) { exec_mask cond_exec(cond); - + exec_mask pre_if_exec = cond_exec & m_exec; if (any(pre_if_exec)) @@ -188,7 +188,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBo bool all_flag = false; exec_mask cond_exec(cond); - + { exec_mask pre_if_exec = cond_exec & m_exec; @@ -218,9 +218,10 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBo exec_mask unexecuted_lanes = cond_exec & m_exec; m_exec = pre_if_exec; - ifBody(); + // 11/22/2025: changed to elseBody() here, simple bug, we use the macro variants of ifelse anyway + elseBody(); - // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + // Propagate any lanes that got disabled inside the else body into the exec mask outside the else body, but turn on any lanes that didn't execute inside the else body. m_exec = m_exec | unexecuted_lanes; check_masks(); @@ -290,17 +291,17 @@ struct scoped_exec_restorer2 { spmd_kernel *m_pKernel; exec_mask m_unexecuted_lanes; - - CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : m_pKernel(pKernel) - { + { exec_mask cond_exec(cond); m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); pKernel->m_exec = cond_exec & pKernel->m_exec; } - CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() - { + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; m_pKernel->check_masks(); } @@ -327,17 +328,17 @@ class scoped_exec_saver inline scoped_exec_saver(spmd_kernel *pKernel) : m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), m_pKernel(pKernel) - { + { #ifdef _DEBUG m_in_loop = pKernel->m_in_loop; #endif } - + inline ~scoped_exec_saver() - { - m_pKernel->m_exec = m_exec; - m_pKernel->m_continue_mask = m_continue_mask; - m_pKernel->m_kernel_exec = m_kernel_exec; + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; #ifdef _DEBUG m_pKernel->m_in_loop = m_in_loop; m_pKernel->check_masks(); @@ -353,7 +354,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo { if (begin == end) return; - + if (!any(m_exec)) return; @@ -362,12 +363,12 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo std::swap(begin, end); exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; - + int total_full = (end - begin) / PROGRAM_COUNT; int total_partial = (end - begin) % PROGRAM_COUNT; lint_t loop_index = begin + program_index; - + const int total_loops = total_full + (total_partial ? 1 : 0); m_continue_mask = exec_mask::all_off(); @@ -390,7 +391,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo m_continue_mask = exec_mask::all_off(); check_masks(); - + store_all(loop_index, loop_index + PROGRAM_COUNT); } @@ -443,9 +444,9 @@ struct scoped_while_restorer #ifdef _DEBUG bool m_prev_in_loop; #endif - - CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_continue_mask(pKernel->m_continue_mask) { @@ -457,8 +458,8 @@ struct scoped_while_restorer #endif } - CPPSPMD_FORCE_INLINE ~scoped_while_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; m_pKernel->m_continue_mask = m_orig_continue_mask; #ifdef _DEBUG @@ -514,7 +515,7 @@ struct scoped_simple_while_restorer m_pKernel(pKernel), m_orig_exec(pKernel->m_exec) { - + #ifdef _DEBUG m_prev_in_loop = pKernel->m_in_loop; pKernel->m_in_loop = true; @@ -536,18 +537,18 @@ struct scoped_simple_while_restorer #define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ while(true) { \ exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; -#define SPMD_SWEND } } +#define SPMD_SWEND } } // Cannot use SPMD break, continue, or return inside simple do #define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { -#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } #undef SPMD_FOR #undef SPMD_END_FOR #define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; #define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) { @@ -576,7 +577,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); - + forIncrBody(); } diff --git a/external/basis_universal/encoder/jpgd.cpp b/external/basis_universal/encoder/jpgd.cpp index 57c7ec7b68..9a534b3ee1 100644 --- a/external/basis_universal/encoder/jpgd.cpp +++ b/external/basis_universal/encoder/jpgd.cpp @@ -3,10 +3,11 @@ // Supports box and linear chroma upsampling. // // Released under two licenses. You are free to choose which license you want: -// License 1: +// License 1: // Public Domain // // License 2: +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,6 +25,10 @@ // v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings. // +#if defined(__wasi__) +#pragma message("__wasi__ defined in jpgd.cpp: note if a decode error occurs, the app will exit because wasi doesn't support longjmp yet.") +#endif + #include "jpgd.h" #include #include @@ -138,7 +143,7 @@ namespace jpgd { { static void idct(int* pTemp, const jpgd_block_t* pSrc) { - (void)pTemp; + (void)pTemp; (void)pSrc; } }; @@ -253,10 +258,10 @@ namespace jpgd { 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, }; - static const uint8 s_idct_col_table[] = - { - 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; // Scalar "fast pathing" IDCT. @@ -608,7 +613,14 @@ namespace jpgd { { m_error_code = status; free_all_blocks(); + +#ifdef __wasi__ + // HACK HACK for wasi's lack of longjmp support + fprintf(stderr, "jpeg_decoder::stop_decoding: JPEG decode failed with status: %i\n", (int)status); + exit(EXIT_FAILURE); +#else longjmp(m_jmp_state, status); +#endif } void* jpeg_decoder::alloc(size_t nSize, bool zero) @@ -2071,8 +2083,10 @@ namespace jpgd { int jpeg_decoder::decode_next_mcu_row() { +#ifndef __wasi__ if (setjmp(m_jmp_state)) return JPGD_FAILED; +#endif const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)) && (m_image_x_size >= 2) && (m_image_y_size >= 2); if (chroma_y_filtering) @@ -2987,8 +3001,10 @@ namespace jpgd { jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags) { +#ifndef __wasi__ if (setjmp(m_jmp_state)) return; +#endif decode_init(pStream, flags); } @@ -3000,8 +3016,10 @@ namespace jpgd { if (m_error_code) return JPGD_FAILED; +#ifndef __wasi__ if (setjmp(m_jmp_state)) return JPGD_FAILED; +#endif decode_start(); diff --git a/external/basis_universal/encoder/jpgd.h b/external/basis_universal/encoder/jpgd.h index 92e53335c6..bd1ad808c0 100644 --- a/external/basis_universal/encoder/jpgd.h +++ b/external/basis_universal/encoder/jpgd.h @@ -1,16 +1,18 @@ // jpgd.h - C++ class for JPEG decompression. -// Public domain, Rich Geldreich +// Dual licensed: Public domain, Rich Geldreich , or Apache 2.0 (see jpgd.cpp) #ifndef JPEG_DECODER_H #define JPEG_DECODER_H #include #include +#ifndef __wasi__ #include +#endif #include #include #ifdef _MSC_VER -#define JPGD_NORETURN __declspec(noreturn) +#define JPGD_NORETURN __declspec(noreturn) #elif defined(__GNUC__) #define JPGD_NORETURN __attribute__ ((noreturn)) #else @@ -140,7 +142,7 @@ namespace jpgd int begin_decoding(); // Returns the next scan line. - // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). // Returns JPGD_SUCCESS if a scan line has been returned. // Returns JPGD_DONE if all scan lines have been returned. @@ -191,7 +193,10 @@ namespace jpgd char m_data[1]; }; + // TODO: we can get rid of longjmp entirely +#ifndef __wasi__ jmp_buf m_jmp_state; +#endif uint32_t m_flags; mem_block* m_pMem_blocks; int m_image_x_size; diff --git a/external/basis_universal/encoder_lib/encoder_lib.vcxproj b/external/basis_universal/encoder_lib/encoder_lib.vcxproj index 3ffa09af26..90daddeb04 100644 --- a/external/basis_universal/encoder_lib/encoder_lib.vcxproj +++ b/external/basis_universal/encoder_lib/encoder_lib.vcxproj @@ -31,6 +31,8 @@ + + @@ -59,6 +61,8 @@ + + @@ -92,12 +96,14 @@ + + @@ -119,40 +125,40 @@ StaticLibrary true Unicode - v143 + v145 StaticLibrary false true Unicode - v143 + v145 StaticLibrary true Unicode - v143 + v145 StaticLibrary true Unicode - v143 + v145 StaticLibrary false true Unicode - v143 + v145 StaticLibrary false true Unicode - v143 + v145 @@ -188,7 +194,7 @@ pch.h ..\OpenCL stdcpp17 - StreamingSIMDExtensions2 + AdvancedVectorExtensions @@ -207,7 +213,7 @@ NotUsing pch.h ..\OpenCL - StreamingSIMDExtensions2 + AdvancedVectorExtensions false false stdcpp17 @@ -227,7 +233,7 @@ true NotUsing pch.h - StreamingSIMDExtensions2 + AdvancedVectorExtensions ..\OpenCL Level4 stdcpp17 @@ -267,7 +273,7 @@ pch.h false false - StreamingSIMDExtensions2 + AdvancedVectorExtensions ..\OpenCL Full AnySuitable @@ -313,4 +319,4 @@ - + \ No newline at end of file diff --git a/external/basis_universal/encoder_lib/encoder_lib.vcxproj.filters b/external/basis_universal/encoder_lib/encoder_lib.vcxproj.filters index aaee9d98ac..093809c2df 100644 --- a/external/basis_universal/encoder_lib/encoder_lib.vcxproj.filters +++ b/external/basis_universal/encoder_lib/encoder_lib.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -99,6 +99,12 @@ Source Files\encoder + + Source Files\encoder + + + Source Files\encoder + @@ -227,6 +233,15 @@ Source Files\encoder + + Source Files\encoder + + + Source Files\encoder + + + Source Files\transcoder + @@ -256,5 +271,8 @@ Source Files\encoder + + Source Files\transcoder + - + \ No newline at end of file diff --git a/external/basis_universal/example/example.cpp b/external/basis_universal/example/example.cpp index 5062d3d209..2e24d173ba 100644 --- a/external/basis_universal/example/example.cpp +++ b/external/basis_universal/example/example.cpp @@ -1,15 +1,20 @@ // File: example.cpp // This minimal LDR/HDR encoding/transcoder example relies on encoder_lib. It shows how to use the encoder in a few different ways, and the transcoder. -// -// It should be compiled with the preprocessor macros BASISU_SUPPORT_SSE (typically 1) and BASISU_SUPPORT_OPENCL (typically 1). +// +// It should be compiled with the preprocessor macros BASISU_SUPPORT_SSE (typically 1) and BASISU_SUPPORT_OPENCL (typically 1). // They should be set to the same preprocesor options as the encoder. // If OpenCL is enabled, the "..\OpenCL" directory should be in your compiler's include path. Additionally, link against "..\OpenCL\lib\opencl64.lib". #include "../encoder/basisu_comp.h" #include "../transcoder/basisu_transcoder.h" #include "../encoder/basisu_gpu_texture.h" +#include "../encoder/basisu_astc_ldr_encode.h" #define USE_ENCODER (1) +#define ENABLE_DEBUG_PRINTF (0) + +//#define FORCE_SAN_FAILURE + const bool USE_OPENCL = false; // The encoder lives in the "basisu" namespace. @@ -17,7 +22,7 @@ const bool USE_OPENCL = false; using namespace basisu; // Quick function to create a visualization of the Mandelbrot set as an float HDR image. -static void create_mandelbrot(imagef& img) +static void create_mandelbrot(imagef& img) { const int width = 256; const int height = 256; @@ -25,30 +30,30 @@ static void create_mandelbrot(imagef& img) // Create a more interesting color palette uint8_t palette[256][3]; - for (int i = 0; i < 256; i++) + for (int i = 0; i < 256; i++) { - if (i < 64) + if (i < 64) { // Blue to cyan transition palette[i][0] = static_cast(0); // Red component palette[i][1] = static_cast(i * 4); // Green component palette[i][2] = static_cast(255); // Blue component } - else if (i < 128) + else if (i < 128) { // Cyan to green transition palette[i][0] = static_cast(0); // Red component palette[i][1] = static_cast(255); // Green component palette[i][2] = static_cast(255 - (i - 64) * 4); // Blue component } - else if (i < 192) + else if (i < 192) { // Green to yellow transition palette[i][0] = static_cast((i - 128) * 4); // Red component palette[i][1] = static_cast(255); // Green component palette[i][2] = static_cast(0); // Blue component } - else + else { // Yellow to red transition palette[i][0] = static_cast(255); // Red component @@ -58,9 +63,9 @@ static void create_mandelbrot(imagef& img) } // Iterate over each pixel in the image - for (int px = 0; px < width; px++) + for (int px = 0; px < width; px++) { - for (int py = 0; py < height; py++) + for (int py = 0; py < height; py++) { double x0 = (px - width / 2.0) * 4.0 / width; double y0 = (py - height / 2.0) * 4.0 / height; @@ -71,7 +76,7 @@ static void create_mandelbrot(imagef& img) double x_temp; int iter; - for (iter = 0; iter < max_iter; iter++) + for (iter = 0; iter < max_iter; iter++) { zx_squared = zx * zx; zy_squared = zy * zy; @@ -148,7 +153,7 @@ static bool encode_uastc_ldr() // basis_compress() is a simple wrapper around the basis_compressor_params and basis_compressor classes. void* pKTX2_data = basis_compress( - basist::basis_tex_format::cUASTC4x4, + basist::basis_tex_format::cUASTC_LDR_4x4, source_images, cFlagThreaded | cFlagPrintStats | cFlagDebug | cFlagPrintStatus, 0.0f, &file_size, @@ -173,7 +178,7 @@ static bool encode_uastc_ldr() static bool encode_uastc_hdr() { const uint32_t W = 256, H = 256; - + imagef img(W, H); #if 1 @@ -196,11 +201,11 @@ static bool encode_uastc_hdr() params.m_write_output_basis_or_ktx2_files = true; params.m_out_filename = "test_uastc_hdr.ktx2"; params.m_perceptual = true; - + #if 1 // Create a job pool containing 7 total threads (the calling thread plus 6 additional threads). - // A job pool must be created, even if threading is disabled. It's fine to pass in 0 for NUM_THREADS. - const uint32_t NUM_THREADS = 6; + // A job pool must be created, even if threading is disabled. + const uint32_t NUM_THREADS = 7; job_pool jp(NUM_THREADS); params.m_pJob_pool = &jp; params.m_multithreading = true; @@ -219,13 +224,13 @@ static bool encode_uastc_hdr() basisu::basis_compressor::error_code ec = comp.process(); if (ec != basisu::basis_compressor::cECSuccess) return false; - + return true; } // This example function loads a .KTX2 file and then transcodes it to various compressed/uncompressed texture formats. -// It writes .DDS and .ASTC files. -// ARM's astcenc tool can be used to unpack the .ASTC file: +// It writes .DDS and .ASTC files. +// ARM's astcenc tool can be used to unpack the .ASTC file: // astcenc-avx2.exe -dh test_uastc_hdr_astc.astc out.exr static bool transcode_hdr() { @@ -252,10 +257,10 @@ static bool transcode_hdr() // This example only transcodes UASTC HDR textures. if (!transcoder.is_hdr()) return false; - + // Begin transcoding (this will be a no-op with UASTC HDR textures, but you still need to do it. For ETC1S it'll unpack the global codebooks.) transcoder.start_transcoding(); - + // Transcode to BC6H and write a BC6H .DDS file. { gpu_image tex(texture_format::cBC6HUnsigned, width, height); @@ -268,7 +273,7 @@ static bool transcode_hdr() gpu_image_vec tex_vec; tex_vec.push_back(tex); - if (!write_compressed_texture_file("test_uastc_hdr_bc6h.dds", tex_vec, true)) + if (!write_compressed_texture_file("test_uastc_hdr_bc6h.dds", tex_vec, false)) return false; } @@ -423,7 +428,7 @@ const uint32_t NUM_TEST_BLOCKS = (sizeof(g_test_blocks) / sizeof(g_test_blocks[0 static bool block_unpack_and_transcode_example(void) { printf("block_unpack_and_transcode_example:\n"); - + for (uint32_t test_block_iter = 0; test_block_iter < NUM_TEST_BLOCKS; test_block_iter++) { printf("-- Test block %u:\n", test_block_iter); @@ -431,7 +436,7 @@ static bool block_unpack_and_transcode_example(void) const uint8_t* pASTC_blk = &g_test_blocks[test_block_iter * 2 + 0][0]; const uint8_t* pBC6H_blk = &g_test_blocks[test_block_iter * 2 + 1][0]; - // Unpack the physical ASTC block to logical. + // Unpack the physical ASTC block to logical. // Note this is a full ASTC block unpack, and is not specific to UASTC. It does not verify that the block follows the UASTC HDR spec, only ASTC. astc_helpers::log_astc_block log_blk; bool status = astc_helpers::unpack_block(pASTC_blk, log_blk, 4, 4); @@ -470,7 +475,7 @@ static bool block_unpack_and_transcode_example(void) return false; } } // test_block_iter - + printf("Transcode test OK\n"); return true; @@ -492,7 +497,7 @@ static void fuzz_uastc_hdr_transcoder_test() for (uint32_t t = 0; t < NUM_TRIES; t++) { basist::astc_blk astc_blk; - + if (rg.frand(0.0f, 1.0f) < .3f) { // Fully random block @@ -564,6 +569,85 @@ static void fuzz_uastc_hdr_transcoder_test() printf("OK\n"); } +void wrap_image(const image& src, image& dst, int gridX, int gridY, float maxOffset, bool randomize, basisu::rand &rnd) +{ + if (gridX < 1) gridX = 1; + if (gridY < 1) gridY = 1; + + const int vxCountX = gridX + 1; + const int vxCountY = gridY + 1; + const int stride = vxCountX; + + const int w = src.get_width(); + const int h = src.get_height(); + + dst.resize(w, h); + + dst.set_all(g_black_color); + + basisu::vector verts(vxCountX * vxCountY); + basisu::vector uvs(vxCountX * vxCountY); + basisu::vector cols(vxCountX * vxCountY); + + for (int gy = 0; gy <= gridY; ++gy) + { + for (int gx = 0; gx <= gridX; ++gx) + { + float x = (gx / float(gridX)) * (w - 1); + float y = (gy / float(gridY)) * (h - 1); + + float rx = x; + float ry = y; + + if (randomize) + { + rx += rnd.frand(-maxOffset, maxOffset); + ry += rnd.frand(-maxOffset, maxOffset); + } + + verts[gy * stride + gx] = { rx, ry }; + + float u = gx / float(gridX); + float v = gy / float(gridY); + + u = std::max(0.0f, std::min(1.0f, u)); + v = std::max(0.0f, std::min(1.0f, v)); + + uvs[gy * stride + gx] = { u, v }; + + color_rgba c(g_white_color); + + cols[gy * stride + gx] = c; + } + } + + for (int gy = 0; gy < gridY; ++gy) + { + for (int gx = 0; gx < gridX; ++gx) + { + int i0 = gy * stride + gx; + int i1 = i0 + 1; + int i2 = i0 + stride; + int i3 = i2 + 1; + + tri2 tA; + tA.p0 = verts[i0]; tA.p1 = verts[i1]; tA.p2 = verts[i3]; + tA.t0 = uvs[i0]; tA.t1 = uvs[i1]; tA.t2 = uvs[i3]; + tA.c0 = cols[i0]; tA.c1 = cols[i1]; tA.c2 = cols[i3]; + + draw_tri2(dst, &src, tA, randomize); + + tri2 tB; + tB.p0 = verts[i0]; tB.p1 = verts[i3]; tB.p2 = verts[i2]; + tB.t0 = uvs[i0]; tB.t1 = uvs[i3]; tB.t2 = uvs[i2]; + tB.c0 = cols[i0]; tB.c1 = cols[i3]; tB.c2 = cols[i2]; + + draw_tri2(dst, &src, tB, randomize); + } // gx + } // by + +} + enum class codec_class { cETC1S = 0, @@ -571,20 +655,28 @@ enum class codec_class cUASTC_HDR_4x4 = 2, cASTC_HDR_6x6 = 3, cUASTC_HDR_6x6 = 4, + cASTC_LDR = 5, + cXUASTC_LDR = 6, cTOTAL }; -// The main point of this test is to exercise lots of internal compressor code paths, and transcoder code paths. -bool random_compression_fuzz_test() +// The main point of this test is to exercise lots of internal code paths. +bool random_compress_test() { printf("Random XUASTC/ASTC LDR 4x4-12x12 compression test:\n"); - //const uint32_t N = 256; - const uint32_t N = 64; - const uint32_t MAX_WIDTH = 1024, MAX_HEIGHT = 1024; + const uint32_t num_images = 18; + image test_images[num_images + 1]; + for (uint32_t i = 0; i < num_images; i++) + load_png(fmt_string("../test_files/kodim{02}.png", 1 + i).c_str(), test_images[i]); + + const uint32_t N = 16; + //const uint32_t N = 5000; + const uint32_t MAX_WIDTH = 1024, MAX_HEIGHT = 1024; + basisu::rand rnd; - + float lowest_psnr1 = BIG_FLOAT_VAL, lowest_psnr2 = BIG_FLOAT_VAL; struct result @@ -598,9 +690,11 @@ bool random_compression_fuzz_test() for (uint32_t i = 0; i < N; i++) { - uint32_t seed = 0x2603455 + i; + uint32_t seed = 166136844 + i; - //seed = 23082246; // ETC1S perceptual colorspace error overflow test + //seed = 23082246; // etc1s 1-bit SSE overflow + //seed = 56636601; // UASTC HDR 4x4 assert tol + //seed = 56636744; // HDR 6x6 float overflow fmt_printf("------------------------------ Seed: {}\n", seed); rnd.seed(seed); @@ -609,7 +703,7 @@ bool random_compression_fuzz_test() const uint32_t h = rnd.irand(1, MAX_HEIGHT); const bool mips = rnd.bit(); const bool use_a = rnd.bit(); - + fmt_printf("Trying {}x{}, mips: {}, use_a: {}\n", w, h, mips, use_a); // Chose a random codec/block size to test @@ -618,7 +712,10 @@ bool random_compression_fuzz_test() bool is_hdr = false; uint32_t rnd_codec_class = rnd.irand(0, (uint32_t)codec_class::cTOTAL - 1); - + + // TODO - make this a command line + //rnd_codec_class = rnd.bit() ? (uint32_t)codec_class::cXUASTC_LDR : (uint32_t)codec_class::cASTC_LDR; + //rnd_codec_class = (uint32_t)codec_class::cXUASTC_LDR; //rnd_codec_class = (uint32_t)codec_class::cETC1S; switch (rnd_codec_class) @@ -630,7 +727,7 @@ bool random_compression_fuzz_test() } case (uint32_t)codec_class::cUASTC_LDR_4x4: { - tex_mode = basist::basis_tex_format::cUASTC4x4; + tex_mode = basist::basis_tex_format::cUASTC_LDR_4x4; break; } case (uint32_t)codec_class::cUASTC_HDR_4x4: @@ -647,10 +744,24 @@ bool random_compression_fuzz_test() } case (uint32_t)codec_class::cUASTC_HDR_6x6: { - tex_mode = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + tex_mode = basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE; is_hdr = true; break; } + case (uint32_t)codec_class::cASTC_LDR: + { + // ASTC LDR 4x4-12x12 + const uint32_t block_variant = rnd.irand(0, astc_helpers::NUM_ASTC_BLOCK_SIZES - 1); + tex_mode = (basist::basis_tex_format)((uint32_t)basist::basis_tex_format::cASTC_LDR_4x4 + block_variant); + break; + } + case (uint32_t)codec_class::cXUASTC_LDR: + { + // XUASTC LDR 4x4-12x12 + const uint32_t block_variant = rnd.irand(0, astc_helpers::NUM_ASTC_BLOCK_SIZES - 1); + tex_mode = (basist::basis_tex_format)((uint32_t)basist::basis_tex_format::cXUASTC_LDR_4x4 + block_variant); + break; + } default: assert(0); tex_mode = basist::basis_tex_format::cETC1S; @@ -658,9 +769,9 @@ bool random_compression_fuzz_test() } fmt_printf("Testing basis_tex_format={}\n", (uint32_t)tex_mode); - + size_t comp_size = 0; - + // Create random LDR source image to compress image src_img; src_img.resize(w, h, w, color_rgba(rnd.byte(), rnd.byte(), rnd.byte(), use_a ? rnd.byte() : 255)); @@ -668,7 +779,7 @@ bool random_compression_fuzz_test() if (rnd.irand(0, 7) >= 1) { const uint32_t nt = rnd.irand(0, 1000); - + for (uint32_t k = 0; k < nt; k++) { color_rgba c(rnd.byte(), rnd.byte(), rnd.byte(), use_a ? rnd.byte() : 255); @@ -680,7 +791,7 @@ bool random_compression_fuzz_test() uint32_t xe = rnd.irand(0, w - 1); if (xs > xe) std::swap(xs, xe); - + uint32_t ys = rnd.irand(0, h - 1); uint32_t ye = rnd.irand(0, h - 1); if (ys > ye) @@ -695,7 +806,7 @@ bool random_compression_fuzz_test() uint32_t ys = rnd.irand(0, h - 1); uint32_t ye = rnd.irand(0, h - 1); - + basisu::draw_line(src_img, xs, ys, xe, ye, c); } else if (r == 6) @@ -712,9 +823,9 @@ bool random_compression_fuzz_test() uint32_t y = rnd.irand(0, h - 1); uint32_t sx = rnd.irand(1, 3); uint32_t sy = rnd.irand(1, 3); - + uint32_t l = rnd.irand(1, 10); - + char buf[32] = {}; for (uint32_t j = 0; j < l; j++) buf[j] = (char)rnd.irand(32, 127); @@ -725,13 +836,13 @@ bool random_compression_fuzz_test() { uint32_t xs = rnd.irand(0, w - 1); uint32_t ys = rnd.irand(0, h - 1); - + uint32_t xl = rnd.irand(1, 100); uint32_t yl = rnd.irand(1, 100); uint32_t xe = minimum(xs + xl - 1, w - 1); uint32_t ye = minimum(ys + yl - 1, h - 1); - + color_rgba cols[4]; cols[0] = c; for (uint32_t j = 1; j < 4; j++) @@ -778,24 +889,101 @@ bool random_compression_fuzz_test() } else src_img(x, y) = q; - } // x + } // x } // y } + else if ((r < 20) && (num_images)) + { + uint32_t image_index = rnd.irand(0, num_images - 1); + + const image& img = test_images[image_index]; + if (img.get_width()) + { + float tw = (float)rnd.irand(1, minimum(128, img.get_width())); + float th = (float)rnd.irand(1, minimum(128, img.get_height())); + + float u = (float)rnd.irand(0, img.get_width() - (int)tw); + float v = (float)rnd.irand(0, img.get_height() - (int)th); + + u /= (float)img.get_width(); + v /= (float)img.get_height(); + + tw /= (float)img.get_width(); + th /= (float)img.get_height(); + + float dx = (float)rnd.irand(0, src_img.get_width() - 1); + float dy = (float)rnd.irand(0, src_img.get_height() - 1); + + float dw = (float)rnd.irand(1, minimum(256, img.get_width())); + float dh = (float)rnd.irand(1, minimum(256, img.get_height())); + + tri2 tri; + tri.p0.set(dx, dy); + tri.t0.set(u, v); + + tri.p1.set(dx + dw, dy); + tri.t1.set(u + tw, v); + + tri.p2.set(dx + dw, dy + dh); + tri.t2.set(u + tw, v + th); + + bool alpha_blend = rnd.bit(); + + if (alpha_blend) + { + tri.c0.set(rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(1, 255)); + tri.c1.set(rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(1, 255)); + tri.c2.set(rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(1, 255)); + } + else + { + tri.c0 = g_white_color; + tri.c1 = g_white_color; + tri.c2 = g_white_color; + } + + draw_tri2(src_img, &img, tri, alpha_blend); + + tri.p0.set(dx, dy); + tri.t0.set(u, v); + + tri.p1.set(dx + dw, dy + dh); + tri.t1.set(u + tw, v + th); + tri.c1 = tri.c2; + + tri.p2.set(dx, dy + dh); + tri.t2.set(u, v + th); + tri.c2.set(rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(100, 255), rnd.irand(1, 255)); + + draw_tri2(src_img, &img, tri, alpha_blend); + } + } else { src_img(rnd.irand(0, w - 1), rnd.irand(0, h - 1)) = c; } } } - + if ((use_a) && (rnd.irand(0, 3) >= 2)) { const uint32_t nt = rnd.irand(0, 1000); - + for (uint32_t k = 0; k < nt; k++) src_img(rnd.irand(0, w - 1), rnd.irand(0, h - 1)).a = rnd.byte(); } + if (rnd.bit()) + { + int gridX = rnd.irand(8, 24); + int gridY = rnd.irand(8, 24); + float maxOffset = rnd.frand(0.0f, (float)maximum(gridX, gridY)); + + image tmp_img; + wrap_image(src_img, tmp_img, gridX, gridY, maxOffset, true, rnd); + src_img.swap(tmp_img); + } + if (!use_a) { for (uint32_t y = 0; y < h; y++) @@ -805,12 +993,12 @@ bool random_compression_fuzz_test() //save_png("test.png", src_img); //fmt_printf("Has alpha: {}\n", src_img.has_alpha()); - + // Choose randomized codec parameters uint32_t flags = cFlagPrintStats | cFlagValidateOutput | cFlagPrintStatus; - - //flags |= cFlagDebug; - + + flags |= cFlagDebug; + flags |= cFlagThreaded; if (rnd.bit()) @@ -826,16 +1014,16 @@ bool random_compression_fuzz_test() flags |= cFlagREC2020; float quality = 0.0f; - + switch (rnd_codec_class) { case (uint32_t)codec_class::cETC1S: { // ETC1S - + // Choose random ETC1S quality level flags |= rnd.irand(1, 255); - + break; } case (uint32_t)codec_class::cUASTC_LDR_4x4: @@ -846,7 +1034,6 @@ bool random_compression_fuzz_test() { // Choose random RDO lambda quality = rnd.frand(0.0, 10.0f); - flags |= cFlagUASTCRDO; } // Choose random effort level @@ -857,7 +1044,7 @@ bool random_compression_fuzz_test() case (uint32_t)codec_class::cUASTC_HDR_4x4: { // UASTC HDR 4x4 - + // Choose random effort level. flags |= rnd.irand(uastc_hdr_4x4_codec_options::cMinLevel, uastc_hdr_4x4_codec_options::cMaxLevel); @@ -867,7 +1054,7 @@ bool random_compression_fuzz_test() case (uint32_t)codec_class::cUASTC_HDR_6x6: { // RDO ASTC HDR 6x6 or UASTC HDR 6x6 - + // Chose random effort level flags |= rnd.irand(0, astc_6x6_hdr::ASTC_HDR_6X6_MAX_USER_COMP_LEVEL); @@ -879,12 +1066,33 @@ bool random_compression_fuzz_test() break; } + case (uint32_t)codec_class::cASTC_LDR: + case (uint32_t)codec_class::cXUASTC_LDR: + { + // ASTC/XUASTC LDR 4x4-12x12 + + // Choose random profile + uint32_t xuastc_ldr_syntax = rnd.irand(0, (uint32_t)basist::astc_ldr_t::xuastc_ldr_syntax::cTotal - 1); + flags |= (xuastc_ldr_syntax << cFlagXUASTCLDRSyntaxShift); + + // Choose random effort + uint32_t effort = rnd.irand(basisu::astc_ldr::EFFORT_LEVEL_MIN, basisu::astc_ldr::EFFORT_LEVEL_MAX); + flags |= effort; + + // Choose random weight grid DCT quality + quality = (float)rnd.frand(1.0f, 100.0f); + + if (rnd.irand(0, 7) == 0) + quality = 0.0f; // sometimes disable DCT + + break; + } default: { assert(0); } } - + void* pComp_data = nullptr; image_stats stats; @@ -892,7 +1100,7 @@ bool random_compression_fuzz_test() { basisu::vector hdr_source_images; imagef hdr_src_img(src_img.get_width(), src_img.get_height()); - + const float max_y = rnd.frand(.000125f, 30000.0f) / 255.0f; for (uint32_t y = 0; y < src_img.get_height(); y++) @@ -907,7 +1115,7 @@ bool random_compression_fuzz_test() } //write_exr("test.exr", hdr_src_img, 3, 0); - + hdr_source_images.push_back(hdr_src_img); pComp_data = basisu::basis_compress(tex_mode, hdr_source_images, flags, quality, &comp_size, &stats); } @@ -917,7 +1125,8 @@ bool random_compression_fuzz_test() ldr_source_images.push_back(src_img); //save_png("test.png", src_img); - + //save_png(fmt_string("test_{}.png", seed), src_img); + pComp_data = basisu::basis_compress(tex_mode, ldr_source_images, flags, quality, &comp_size, &stats); } @@ -941,39 +1150,789 @@ bool random_compression_fuzz_test() psnr2 }); } // i - + printf("PSNR Results:\n"); - + for (uint32_t i = 0; i < results.size(); i++) fmt_printf("{},{},{},{}\n", results[i].m_seed, (uint32_t)results[i].m_fmt, results[i].m_psnr1, results[i].m_psnr2); - + printf("\n"); for (uint32_t i = 0; i < results.size(); i++) fmt_printf("seed={} tex_mode={}, psnr1={}, psnr2={}\n", results[i].m_seed, (uint32_t)results[i].m_fmt, results[i].m_psnr1, results[i].m_psnr2); - + // Success here is essentially not crashing or asserting or SAN'ing earlier printf("Success\n"); + + return true; +} + +static bool test_compress_etc1s() +{ + printf("test_compress_etc1s:\n"); + + const uint32_t W = 256, H = 256; + + image img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? 255 : 0); + + basis_compressor_params params; + + // Set the format to ETC1S using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cETC1S, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_etc1s.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_uastc_ldr_4x4() +{ + printf("test_compress_uastc_ldr_4x4:\n"); + + const uint32_t W = 256, H = 256; + + image img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? 255 : 0); + + basis_compressor_params params; + + // Set the format to UASTC LDR 4x4 using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cUASTC_LDR_4x4, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_uastc_ldr_4x4.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_uastc_hdr_4x4() +{ + printf("test_compress_uastc_hdr_4x4:\n"); + + const uint32_t W = 256, H = 256; + + imagef img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? basist::ASTC_HDR_MAX_VAL : 1000.0f); + + basis_compressor_params params; + + // Set the format to UASTC HDR 4x4 using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cUASTC_HDR_4x4, 100, 8); + + // This sets the low-level UASTC HDR 4x4 codec quality level directly (overriding set_format_mode_and_quality_effort()'s unified effort level set previously). + //params.m_uastc_hdr_4x4_options.set_quality_level(3); + + // Use perceptual channel weights (2,3,1) for RGB error metrics instead of uniform (1,1,1). + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images_hdr.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_uastc_hdr_4x4.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_astc_hdr_6x6() +{ + printf("test_compress_astc_hdr_6x6:\n"); + + const uint32_t W = 256, H = 256; + + imagef img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? basist::ASTC_HDR_MAX_VAL : 1000.0f); + + basis_compressor_params params; + + // Set the format to UASTC HDR 4x4 using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cASTC_HDR_6x6, 100, 8); + + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images_hdr.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_astc_hdr_6x6.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_uastc_hdr_6x6i() +{ + printf("test_compress_uastc_hdr_6x6i:\n"); + + const uint32_t W = 256, H = 256; + + imagef img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? basist::ASTC_HDR_MAX_VAL : 1000.0f); + + basis_compressor_params params; + + // Set the format to UASTC HDR 6x6i using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE, 75, 8); + + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images_hdr.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_uastc_hdr_6x6i.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_astc_ldr_6x6() +{ + printf("test_compress_astc_ldr_6x6:\n"); + + const uint32_t W = 256, H = 256; + + image img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? 255 : 0); + + basis_compressor_params params; + + // Set the format to ASTC LDR 6x6 using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cASTC_LDR_6x6, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_astc_ldr_6x6.ktx2"; + + // enable Zstd supercompression + params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; + // enable automatic mipmap generation + params.m_mip_gen = true; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_xuastc_ldr_6x6() +{ + printf("test_compress_xuastc_ldr_6x6:\n"); + + const uint32_t W = 256, H = 256; + + image img(W, H); + for (uint32_t y = 0; y < H; y++) + for (uint32_t x = 0; x < W; x++) + img(x, y).set(((x ^ y) & 1) ? 255 : 0); + + basis_compressor_params params; + + // Set the format to XUASTC LDR 6x6 using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cXUASTC_LDR_6x6, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Write a .KTX2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_xuastc_ldr_6x6.ktx2"; + + // enable automatic mipmap generation + params.m_mip_gen = true; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; return true; } +// View the resulting texture video .basis file using the webgl/video_test WebGL sample. +static bool test_compress_etc1s_texture_video(bool write_ktx2_flag, bool gen_mips_flag) +{ + printf("test_compress_etc1s_texture_video:\n"); + + const uint32_t NUM_FRAMES = 50; + const uint32_t W = 384, H = 256; + + basis_compressor_params params; + + // Set the format to ETC1S using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cETC1S, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + params.m_mip_gen = gen_mips_flag; + + // Create the frames to compress + for (uint32_t frame_index = 0; frame_index < NUM_FRAMES; frame_index++) + { + image img(W, H); + + img.debug_text(frame_index, 20, 1, 1, g_white_color, &g_black_color, false, fmt_string("Frame {}", frame_index).c_str()); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + } + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Set the texture type to video frames, which will cause the compressor to treat the images as a texture video sequence (using skip blocks). + params.m_tex_type = basist::basis_texture_type::cBASISTexTypeVideoFrames; + + // Write a .basis file to disk. (.KTX2 supports texture video too, but our current texture video WebGL sample only supports .basis.) + params.m_write_output_basis_or_ktx2_files = true; + if (write_ktx2_flag) + { + params.m_create_ktx2_file = true; + params.m_out_filename = gen_mips_flag ? "test_etc1s_texture_video_mips.ktx2" : "test_etc1s_texture_video.ktx2"; + } + else + { + params.m_out_filename = gen_mips_flag ? "test_etc1s_texture_video_mips.basis" : "test_etc1s_texture_video.basis"; + } + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +// View the resulting texture video .basis file using the webgl/video_test WebGL sample. +static bool test_compress_xuastc_ldr_texture_video(bool write_ktx2_flag, bool gen_mips_flag) +{ + printf("test_compress_xuastc_ldr_texture_video:\n"); + + const uint32_t NUM_FRAMES = 50; + const uint32_t W = 384, H = 256; + + basis_compressor_params params; + + // Set the format to XUASTC LDR using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cXUASTC_LDR_8x8, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + params.m_mip_gen = gen_mips_flag; + + // Create the frames to compress + for (uint32_t frame_index = 0; frame_index < NUM_FRAMES; frame_index++) + { + image img(W, H); + + img.debug_text(frame_index, 20, 1, 1, g_white_color, &g_black_color, false, fmt_string("Frame {}", frame_index).c_str()); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + } + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Set the texture type to video frames, which will cause the compressor to treat the images as a texture video sequence (using skip blocks). + params.m_tex_type = basist::basis_texture_type::cBASISTexTypeVideoFrames; + + // Write a .basis file to disk. (.KTX2 supports texture video too, but our current texture video WebGL sample only supports .basis.) + params.m_write_output_basis_or_ktx2_files = true; + if (write_ktx2_flag) + { + params.m_create_ktx2_file = true; + params.m_out_filename = gen_mips_flag ? "test_xuastc_ldr_texture_video_mips.ktx2" : "test_xuastc_ldr_texture_video.ktx2"; + } + else + { + params.m_out_filename = gen_mips_flag ? "test_xuastc_ldr_texture_video_mips.basis" : "test_xuastc_ldr_texture_video.basis"; + } + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool test_compress_uastc_hdr_6x6i_array_custom_mipmap() +{ + printf("test_compress_uastc_hdr_6x6i_array_custom_mipmap:\n"); + + const uint32_t ARRAY_SIZE = 2; + const uint32_t W = 384, H = 256; + + basis_compressor_params params; + + // Set the format to UASTC HDR 6x6i using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE, 75, 3); + + // Input is linear + params.set_srgb_options(false); + + // Create the array slices + for (uint32_t array_index = 0; array_index < ARRAY_SIZE; array_index++) + { + int cur_w = W, cur_h = H; + int mip_index = 0; + + // Create the mipmaps + params.m_source_images_hdr.enlarge(1); + params.m_source_mipmap_images_hdr.enlarge(1); + + do + { + image img(cur_w, cur_h); + + img.debug_text(0, 10, 1, 1, g_white_color, &g_black_color, false, fmt_string("{} {} {}x{}", array_index, mip_index, cur_w, cur_h).c_str()); + + // Upconvert LDR to HDR + imagef hdr_img; + convert_ldr_to_hdr_image(hdr_img, img, true, 100.0f, 0.0f); + + // Add the source image for this mip level. The first mip level goes in m_source_images, and the remaining mip levels go in m_source_mipmap_images. + if (!mip_index) + params.m_source_images_hdr[array_index] = hdr_img; + else + params.m_source_mipmap_images_hdr[array_index].push_back(hdr_img); + + ++mip_index; + + cur_w = maximum(1, cur_w / 2); + cur_h = maximum(1, cur_h / 2); + + } while ((cur_w > 1) || (cur_h > 1)); + + } // array_index + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Set the texture type to video frames, which will cause the compressor to treat the images as a texture video sequence (using skip blocks). + params.m_tex_type = basist::basis_texture_type::cBASISTexType2DArray; + + // Write a .ktx2 file to disk. + params.m_create_ktx2_file = true; + params.m_write_output_basis_or_ktx2_files = true; + params.m_out_filename = "test_uastc_hdr_6x6i_array_custom_mips.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +// Creates a KTX2 ETC1S cubemap texture file +static bool test_compress_etc1s_cubemap(bool tex_array_flag) +{ + printf("test_compress_etc1s_cubemap:\n"); + + // In KTX2, cubemap faces must be square. + const uint32_t W = 256, H = 256; + + basis_compressor_params params; + + // Set the format to ETC1S using the recommended unified method. + params.set_format_mode_and_quality_effort(basist::basis_tex_format::cETC1S, 75, 3); + + // Input is sRGB + params.set_srgb_options(true); + + // Standard face order + static const char* s_pFace_names[6] = { "+X", "-X", "+Y", "-Y", "+Z", "-Z" }; + + const uint32_t num_layers = tex_array_flag ? 3 : 1; + + // Create the 6 faces to compress (for arrays, feed in 6*X) + for (uint32_t layer_index = 0; layer_index < num_layers; layer_index++) + { + for (uint32_t face_index = 0; face_index < 6; face_index++) + { + image img(W, H); + + img.debug_text(0, 10, 1, 1, g_white_color, &g_black_color, false, fmt_string("Layer {}, Face {} {}", layer_index, face_index, s_pFace_names[face_index]).c_str()); + + // Provide the HDR source image. + params.m_source_images.push_back(img); + } + } + + // Enable debug/status output and statistics. + params.m_debug = true; + params.m_status_output = true; + params.m_compute_stats = true; + + // Set the texture type to cubemap/cubemap array + params.m_tex_type = basist::basis_texture_type::cBASISTexTypeCubemapArray; + + params.m_mip_gen = true; + + // Write a .basis file to disk. (.KTX2 supports texture video too, but our current texture video WebGL sample only supports .basis.) + params.m_write_output_basis_or_ktx2_files = true; + params.m_create_ktx2_file = true; + params.m_out_filename = tex_array_flag ? "test_etc1s_cubemap_array.ktx2" : "test_etc1s_cubemap.ktx2"; + + // Create a job pool. A job pool MUST always be created, even if threading is disabled. + // num_total_threads is the TOTAL thread count: 1 = calling thread only, 7 = calling thread + 6 extra. + const uint32_t NUM_THREADS = 7; + job_pool jp(NUM_THREADS); + params.m_pJob_pool = &jp; + params.m_multithreading = true; + + // Initialize and run the compressor. + basis_compressor comp; + if (!comp.init(params)) + return false; + + basisu::basis_compressor::error_code ec = comp.process(); + if (ec != basisu::basis_compressor::cECSuccess) + return false; + + return true; +} + +static bool lowlevel_compression_tests() +{ + // basisu_encoder_init() MUST have been called before this point. + basisu_encoder_init(); + + if (!test_compress_etc1s()) + return false; + + if (!test_compress_uastc_ldr_4x4()) + return false; + + if (!test_compress_uastc_hdr_4x4()) + return false; + + if (!test_compress_astc_hdr_6x6()) + return false; + + if (!test_compress_uastc_hdr_6x6i()) + return false; + + if (!test_compress_astc_ldr_6x6()) + return false; + + if (!test_compress_xuastc_ldr_6x6()) + return false; + + for (uint32_t ktx2_iter = 0; ktx2_iter < 2; ktx2_iter++) + { + for (uint32_t mips_iter = 0; mips_iter < 2; mips_iter++) + { + if (!test_compress_etc1s_texture_video(ktx2_iter != 0, mips_iter != 0)) + return false; + + if (!test_compress_xuastc_ldr_texture_video(ktx2_iter != 0, mips_iter != 0)) + return false; + + } // mips_iter + + } // ktx2_iter + + if (!test_compress_uastc_hdr_6x6i_array_custom_mipmap()) + return false; + + if (!test_compress_etc1s_cubemap(false)) + return false; + + if (!test_compress_etc1s_cubemap(true)) + return false; + + printf("lowlevel_compression_tests: Compression OK\n"); + + return true; +} + +#ifdef FORCE_SAN_FAILURE +static void force_san_failure() +{ + // Purposely do things that should trigger the address sanitizer + int arr[5] = { 0, 1, 2, 3, 4 }; + printf("Out of bounds element: %d\n", arr[10]); + + //uint8_t* p = (uint8_t *)malloc(10); + //p[10] = 99; + + //uint8_t* p = (uint8_t *)malloc(10); + //free(p); + //p[0] = 99; +} +#endif // FORCE_SAN_FAILURE + int main(int arg_c, char* arg_v[]) { BASISU_NOTE_UNUSED(arg_c); BASISU_NOTE_UNUSED(arg_v); +#if defined(DEBUG) | defined(_DEBUG) + printf("DEBUG\n"); +#endif +#ifdef __SANITIZE_ADDRESS__ + printf("__SANITIZE_ADDRESS__\n"); +#endif + +#ifdef FORCE_SAN_FAILURE + force_san_failure(); +#endif + #if USE_ENCODER basisu_encoder_init(USE_OPENCL, false); - if (!random_compression_fuzz_test()) +#if ENABLE_DEBUG_PRINTF + enable_debug_printf(true); +#endif + + if (!lowlevel_compression_tests()) + { + fprintf(stderr, "lowlevel_compression_tests() failed!\n"); return EXIT_FAILURE; + } + if (!random_compress_test()) + { + fprintf(stderr, "random_compress_test() failed!\n"); + return EXIT_FAILURE; + } + if (!block_unpack_and_transcode_example()) + { + fprintf(stderr, "block_unpack_and_transcode_example() failed!\n"); return EXIT_FAILURE; + } fuzz_uastc_hdr_transcoder_test(); - + if (!encode_etc1s()) { fprintf(stderr, "encode_etc1s() failed!\n"); diff --git a/external/basis_universal/example/example.vcxproj b/external/basis_universal/example/example.vcxproj index bd10772210..d33702aa81 100644 --- a/external/basis_universal/example/example.vcxproj +++ b/external/basis_universal/example/example.vcxproj @@ -38,40 +38,40 @@ Application true Unicode - v143 + v145 Application false true Unicode - v143 + v145 Application true Unicode - v143 + v145 Application true Unicode - v143 + v145 Application false true Unicode - v143 + v145 Application false true Unicode - v143 + v145 @@ -100,10 +100,21 @@ $(SolutionDir)\bin\ - + + $(SolutionDir)\bin\ + $(SolutionDir)\bin\ + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + Level4 @@ -112,7 +123,7 @@ true ..\OpenCL stdcpp17 - StreamingSIMDExtensions2 + AdvancedVectorExtensions Console @@ -130,7 +141,7 @@ WIN32;NDEBUG;_HAS_EXCEPTIONS=0;_CONSOLE;%(PreprocessorDefinitions);BASISU_SUPPORT_SSE=1;BASISU_SUPPORT_OPENCL=1 true ..\OpenCL - StreamingSIMDExtensions2 + AdvancedVectorExtensions false false stdcpp17 @@ -152,8 +163,8 @@ StreamingSIMDExtensions2 false ..\OpenCL - Level4 stdcpp17 + Level4 Console @@ -243,4 +254,4 @@ - + \ No newline at end of file diff --git a/external/basis_universal/example_capi/example_capi.c b/external/basis_universal/example_capi/example_capi.c new file mode 100644 index 0000000000..73ff25de77 --- /dev/null +++ b/external/basis_universal/example_capi/example_capi.c @@ -0,0 +1,707 @@ +// example_capi.c - Plain C API examples +// Compresses a procedurally generated 32bpp 512x512 test image to a XUASTC LDR 8x5 .ktx2 file with mipmaps and writes a .ktx2 file. +// The .ktx2 file is then opened by the transcoder module, examined and unpacked to RGBA 32bpp and ASTC textures which are saved to disk as .tga and .astc files. +// The .tga image files can be viewed by many common image editors/viewers. +// The standard .astc texture files can be unpacked to .PNG using ARM's astcenc tool, using a command line like this: astcenc-avx2.exe -ds transcoded_0_0_0.astc 0.png + +#include +#include +#include +#include +#include +#include + +typedef int BOOL; +#define TRUE (1) +#define FALSE (0) + +// Include compressor and transcoder C API definitions +#include "../encoder/basisu_wasm_api.h" +#include "../encoder/basisu_wasm_transcoder_api.h" + +// Write a blob of data in memory to a file +int write_blob_to_file(const char* pFilename, const void* pData, size_t len) +{ + assert(pFilename != NULL); + assert(pData != NULL); + + if (!pFilename || !pData) + return FALSE; + + FILE* f = fopen(pFilename, "wb"); + if (!f) + return FALSE; + + /* Write the data */ + size_t written = fwrite(pData, 1, len, f); + if (written != len) + { + fclose(f); + return FALSE; + } + + if (fclose(f) != 0) + return FALSE; + + return TRUE; /* success */ +} + +// Writes 24/32bpp .TGA image files +int write_tga_image(const char* pFilename, int w, int h, int has_alpha, const uint8_t* pPixelsRGBA) +{ + assert(pFilename != NULL); + assert(pPixelsRGBA != NULL); + assert(w > 0); + assert(h > 0); + assert((has_alpha == 0) || (has_alpha == 1)); + + /* Runtime argument validation */ + if ((!pFilename) || (!pPixelsRGBA) || (w <= 0) || (h <= 0)) + return -1; // invalid argument + + FILE* pFile = fopen(pFilename, "wb"); + if (!pFile) + return -2; // cannot open file + + uint8_t header[18] = { 0 }; + header[2] = 2; // uncompressed true-color + header[12] = (uint8_t)(w & 0xFF); + header[13] = (uint8_t)((w >> 8) & 0xFF); + header[14] = (uint8_t)(h & 0xFF); + header[15] = (uint8_t)((h >> 8) & 0xFF); + header[16] = has_alpha ? 32 : 24; + + /* Classic TGA: bottom-left origin */ + header[17] = has_alpha ? 8 : 0; + + if (fwrite(header, 1, 18, pFile) != 18) + { + fclose(pFile); + return -3; // header write failed + } + + uint64_t bytes_per_pixel = has_alpha ? 4ULL : 3ULL; + uint64_t pixel_bytes_u64 = (uint64_t)w * (uint64_t)h * bytes_per_pixel; + size_t pixel_bytes = (size_t)pixel_bytes_u64; + + if ((uint64_t)pixel_bytes != pixel_bytes_u64) + return -6; // overflow bogus dimensions + + /* allocate one scanline for BGRA/BGR output */ + size_t row_bytes = (size_t)((size_t)w * bytes_per_pixel); + uint8_t* pRow = (uint8_t*)malloc(row_bytes); + if (!pRow) + { + fclose(pFile); + return -7; // out of memory + } + + /* TGA expects rows in bottom-to-top order */ + for (int y = 0; y < h; y++) + { + const uint8_t* pSrcRow = pPixelsRGBA + (size_t)(h - 1 - y) * w * bytes_per_pixel; + + /* Convert RGBA->BGRA or RGB->BGR for this row */ + if (has_alpha) + { + /* 4 bytes per pixel */ + for (int x = 0; x < w; x++) + { + const uint8_t* s = &pSrcRow[x * 4]; + uint8_t* d = &pRow[x * 4]; + + d[0] = s[2]; // B + d[1] = s[1]; // G + d[2] = s[0]; // R + d[3] = s[3]; // A + } + } + else + { + /* 3 bytes per pixel */ + for (int x = 0; x < w; x++) + { + const uint8_t* s = &pSrcRow[x * 3]; + uint8_t* d = &pRow[x * 3]; + + d[0] = s[2]; // B + d[1] = s[1]; // G + d[2] = s[0]; // R + } + } + + if (fwrite(pRow, 1, row_bytes, pFile) != row_bytes) + { + free(pRow); + fclose(pFile); + return -4; // pixel write failed + } + } + + free(pRow); + + if (fclose(pFile) != 0) + return -5; // close failed + + return 0; // success +} + +// Write standard ARM .ASTC format texture files +int write_astc_file(const char* pFilename, + const void* pBlocks, // pointer to ASTC blocks + uint32_t block_width, // in texels [4,12] + uint32_t block_height, // in texels [4,12] + uint32_t dim_x, // image actual dimension in texels + uint32_t dim_y) // image actual dimension in texels +{ + assert(pFilename != NULL); + assert(pBlocks != NULL); + assert(dim_x > 0); + assert(dim_y > 0); + assert((block_width >= 4) && (block_width <= 12)); + assert((block_height >= 4) && (block_height <= 12)); + + FILE* f = fopen(pFilename, "wb"); + if (!f) + return 0; + + /* Helper macro for writing single bytes with error check */ +#define PUTB(v) do { if (fputc((int)(v), f) == EOF) { fclose(f); return 0; } } while (0) + + /* Magic */ + PUTB(0x13); + PUTB(0xAB); + PUTB(0xA1); + PUTB(0x5C); + + /* Block dimensions: x, y, z = 1 */ + PUTB((uint8_t)block_width); + PUTB((uint8_t)block_height); + PUTB(1); /* block depth */ + + /* dim_x (24-bit little endian) */ + PUTB((uint8_t)(dim_x & 0xFF)); + PUTB((uint8_t)((dim_x >> 8) & 0xFF)); + PUTB((uint8_t)((dim_x >> 16) & 0xFF)); + + /* dim_y (24-bit little endian) */ + PUTB((uint8_t)(dim_y & 0xFF)); + PUTB((uint8_t)((dim_y >> 8) & 0xFF)); + PUTB((uint8_t)((dim_y >> 16) & 0xFF)); + + /* dim_z = 1 (24-bit LE) */ + PUTB(1); + PUTB(0); + PUTB(0); + + /* Compute block count and total bytes */ + uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width; + uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height; + + uint64_t total_bytes_u64 = + (uint64_t)num_blocks_x * (uint64_t)num_blocks_y * 16ULL; + + size_t total_bytes = (size_t)total_bytes_u64; + + if ((uint64_t)total_bytes != total_bytes_u64) + { + fclose(f); + return 0; /* overflow → fail */ + } + + /* Write block data directly */ + size_t written = fwrite(pBlocks, 1, total_bytes, f); + if (written != total_bytes) + { + fclose(f); /* still close even if error */ + return 0; + } + + if (fclose(f) != 0) + return 0; + + return 1; /* success */ + +#undef PUTB +} + +// Procedurally create a simple test image in memory +uint8_t* create_pretty_rgba_pattern(int w, int h, float q) +{ + if (w <= 0 || h <= 0) + return NULL; + + uint8_t* pImage = (uint8_t*)malloc((size_t)w * h * 4); + if (!pImage) + return NULL; + + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + /* normalized coordinates 0..1 */ + float fx = (float)x / (float)w; + float fy = (float)y / (float)h; + + /* --- Extra coordinate warping when q != 0 --- */ + if (q != 0.0f) { + float warp = sinf((fx + fy) * 10.0f * q); + fx += 0.15f * q * warp; + fy += 0.15f * q * sinf((fx - fy) * 8.0f * q); + } + + /* Original plasma formula */ + float v = sinf(fx * 12.0f + fy * 4.0f); + v += sinf(fy * 9.0f - fx * 6.0f); + v += sinf((fx + fy) * 7.0f); + + /* Extra variation term — contributes only when q != 0 */ + if (q != 0.0f) + { + v += q * 0.7f * sinf((fx * fx + fy) * 20.0f); + v += q * 0.4f * cosf((fx - fy) * 18.0f); + } + + /* scale to 0..1 */ + v = v * 0.25f + 0.5f; + + float L = 1.5f; + + /* Convert to RGB colors */ + int r = (int)roundf(255.0f * sinf(v * 6.28f) * L); + int g = (int)roundf(255.0f * (1.0f - v) * L); + int b = (int)roundf(255.0f * v * L); + + /* clamp */ + if (r < 0) r = 0; else if (r > 255) r = 255; + if (g < 0) g = 0; else if (g > 255) g = 255; + if (b < 0) b = 0; else if (b > 255) b = 255; + + /* write RGBA */ + uint8_t* p = &pImage[(y * w + x) * 4]; + p[0] = (uint8_t)r; + p[1] = (uint8_t)g; + p[2] = (uint8_t)b; + p[3] = 255; + } + } + + return pImage; +} + +// Takes a KTX2 file in memory and displays info about it, then transcodes it to RGBA32 and ASTC, writing .tga/.astc files to disk +int transcode_ktx2_file(const void* pKTX2_data, size_t ktx2_data_size, const char *pDesc) +{ + printf("------ transcode_ktx2_file(): ktx2 size: %zu, desc: %s\n", ktx2_data_size, pDesc); + + if (!pKTX2_data || !ktx2_data_size) + return FALSE; + + if ((uint32_t)ktx2_data_size != ktx2_data_size) + return FALSE; + + uint64_t ktx2_data_ofs = bt_alloc(ktx2_data_size); + if (!ktx2_data_ofs) + return FALSE; + + memcpy((void*)ktx2_data_ofs, pKTX2_data, ktx2_data_size); + + uint64_t ktx2_handle = bt_ktx2_open(ktx2_data_ofs, (uint32_t)ktx2_data_size); + if (!ktx2_handle) + { + bt_free(ktx2_data_ofs); + return FALSE; + } + + // Just testing LDR here for now + if (!bt_ktx2_is_ldr(ktx2_handle)) + { + bt_ktx2_close(ktx2_handle); + bt_free(ktx2_data_ofs); + return FALSE; + } + + if (!bt_ktx2_start_transcoding(ktx2_handle)) + { + bt_ktx2_close(ktx2_handle); + bt_free(ktx2_data_ofs); + return FALSE; + } + + uint32_t width = bt_ktx2_get_width(ktx2_handle), height = bt_ktx2_get_height(ktx2_handle); + uint32_t levels = bt_ktx2_get_levels(ktx2_handle); // number of mipmap levels, must be >= 1 + uint32_t faces = bt_ktx2_get_faces(ktx2_handle); // 1 or 6 + uint32_t layers = bt_ktx2_get_layers(ktx2_handle); // 0 or array size + + uint32_t basis_tex_format = bt_ktx2_get_basis_tex_format(ktx2_handle); + uint32_t block_width = bt_ktx2_get_block_width(ktx2_handle); + uint32_t block_height = bt_ktx2_get_block_height(ktx2_handle); + uint32_t is_srgb = bt_ktx2_is_srgb(ktx2_handle); + uint32_t is_video = bt_ktx2_is_video(ktx2_handle); // only reliably set after calling bt_ktx2_start_transcoding() + + printf("KTX2 Dimensions: %ux%u, Levels: %u, Faces: %u, Layers: %u\n", width, height, levels, faces, layers); + printf("basis_tex_format: %u\n", basis_tex_format); + printf("Block dimensions: %ux%u\n", block_width, block_height); + printf("is sRGB: %u\n", is_srgb); + printf("is video: %u\n", is_video); + + assert((width >= 1) && (height >= 1)); + assert(levels >= 1); + assert((faces == 6) || (faces == 1)); + + // If layers==0 it's not a texture array + if (layers < 1) + layers = 1; + + // Create our transcoding state handle (which contains thread-local state) + // This is actually optional, and only needed for thread-safe transcoding, but we'll test it here. + uint64_t transcode_state_handle = bt_ktx2_create_transcode_state(); + + for (uint32_t level_index = 0; level_index < levels; level_index++) + { + for (uint32_t layer_index = 0; layer_index < layers; layer_index++) + { + for (uint32_t face_index = 0; face_index < faces; face_index++) + { + printf("- Level: %u, layer: %u, face: %u\n", level_index, layer_index, face_index); + + uint32_t orig_width = bt_ktx2_get_level_orig_width(ktx2_handle, level_index, layer_index, face_index); + uint32_t orig_height = bt_ktx2_get_level_orig_height(ktx2_handle, level_index, layer_index, face_index); + + printf(" Orig dimensions: %ux%u, actual: %ux%u\n", + orig_width, orig_height, + bt_ktx2_get_level_actual_width(ktx2_handle, level_index, layer_index, face_index), bt_ktx2_get_level_actual_height(ktx2_handle, level_index, layer_index, face_index)); + + printf(" Block dimensions: %ux%u, total blocks: %u\n", + bt_ktx2_get_level_num_blocks_x(ktx2_handle, level_index, layer_index, face_index), + bt_ktx2_get_level_num_blocks_y(ktx2_handle, level_index, layer_index, face_index), + bt_ktx2_get_level_total_blocks(ktx2_handle, level_index, layer_index, face_index)); + + printf(" Alpha flag: %u, iframe flag: %u\n", + bt_ktx2_get_level_alpha_flag(ktx2_handle, level_index, layer_index, face_index), + bt_ktx2_get_level_iframe_flag(ktx2_handle, level_index, layer_index, face_index)); + + // First transcode level to uncompressed RGBA32 and write a .tga file + { + char tga_filename[256]; + snprintf(tga_filename, sizeof(tga_filename), "transcoded_%s_L%u_Y%u_F%u.tga", pDesc, level_index, layer_index, face_index); + + uint32_t transcode_buf_size = bt_basis_compute_transcoded_image_size_in_bytes(TF_RGBA32, orig_width, orig_height); + assert(transcode_buf_size); + + uint64_t transcode_buf_ofs = bt_alloc(transcode_buf_size); + + uint32_t decode_flags = 0; + + if (!bt_ktx2_transcode_image_level(ktx2_handle, level_index, layer_index, face_index, + transcode_buf_ofs, transcode_buf_size / sizeof(uint32_t), // it wants blocks or pixels, not bytes + TF_RGBA32, + decode_flags, + 0, 0, -1, -1, transcode_state_handle)) + { + bt_free(transcode_buf_ofs); + bt_ktx2_destroy_transcode_state(transcode_state_handle); + bt_ktx2_close(ktx2_handle); + bt_free(ktx2_data_ofs); + return FALSE; + } + + write_tga_image(tga_filename, orig_width, orig_height, TRUE, (uint8_t*)transcode_buf_ofs); + printf("Wrote file %s\n", tga_filename); + + bt_free(transcode_buf_ofs); + transcode_buf_ofs = 0; + } + + // Now transcode to ASTC and write a .astc file + { + char astc_filename[256]; + snprintf(astc_filename, sizeof(astc_filename), "transcoded_%s_L%u_Y%u_F%u.astc", pDesc, level_index, layer_index, face_index); + + // Determine the correct ASTC transcode texture format from the ktx2 format + uint32_t target_transcode_fmt = bt_basis_get_transcoder_texture_format_from_basis_tex_format(basis_tex_format); + + uint32_t transcode_buf_size = bt_basis_compute_transcoded_image_size_in_bytes(target_transcode_fmt, orig_width, orig_height); + assert(transcode_buf_size); + + uint64_t transcode_buf_ofs = bt_alloc(transcode_buf_size); + + uint32_t decode_flags = 0; + + if (!bt_ktx2_transcode_image_level(ktx2_handle, level_index, layer_index, face_index, + transcode_buf_ofs, transcode_buf_size / 16, // API wants blocks or pixels, not bytes - ASTC is always 16 bytes per block + target_transcode_fmt, + decode_flags, + 0, 0, -1, -1, transcode_state_handle)) + { + bt_free(transcode_buf_ofs); + bt_ktx2_destroy_transcode_state(transcode_state_handle); + bt_ktx2_close(ktx2_handle); + bt_free(ktx2_data_ofs); + return FALSE; + } + + write_astc_file(astc_filename, (void*)transcode_buf_ofs, block_width, block_height, orig_width, orig_height); + printf("Wrote .astc file %s\n", astc_filename); + + bt_free(transcode_buf_ofs); + transcode_buf_ofs = 0; + } + + } // face_index + + } // layer_index + + } // level_index + + bt_ktx2_destroy_transcode_state(transcode_state_handle); + transcode_state_handle = 0; + + bt_ktx2_close(ktx2_handle); + ktx2_handle = 0; + + bt_free(ktx2_data_ofs); + ktx2_data_ofs = 0; + + return TRUE; +} + +// Simple 2D test +int test_2D() +{ + printf("------ test_2D():\n"); + + // Generate a test image + int W = 512, H = 512; + + uint8_t* pSrc_image = create_pretty_rgba_pattern(W, H, 0.0f); + + // Save the test image to a .tga file + write_tga_image("test_image.tga", W, H, TRUE, pSrc_image); + printf("Wrote file test_image.tga\n"); + + // Compress it to .ktx2 + uint64_t comp_params = bu_new_comp_params(); + + // Allocate memory + uint64_t img_ofs = bu_alloc(W * H * 4); + if (!img_ofs) + { + fprintf(stderr, "bu_alloc() failed\n"); + return EXIT_FAILURE; + } + + // Copy the test image into the allocated memory + memcpy((void*)img_ofs, pSrc_image, W * H * 4); + + // Supply the image to the compressor - it'll immediately make a copy of the data + if (!bu_comp_params_set_image_rgba32(comp_params, 0, img_ofs, W, H, W * 4)) + { + fprintf(stderr, "bu_comp_params_set_image_rgba32() failed\n"); + return EXIT_FAILURE; + } + + bu_free(img_ofs); + img_ofs = 0; + + // Now compress it to XUASTC LDR 8x5 with weight grid DCT + uint32_t basis_tex_format = BTF_XUASTC_LDR_8X5; + //uint32_t basis_tex_format = BTF_ASTC_LDR_8X5; + //uint32_t basis_tex_format = BTF_ETC1S; + //uint32_t basis_tex_format = BTF_UASTC_LDR_4X4; + + uint32_t quality_level = 85; + uint32_t effort_level = 2; + + uint32_t flags = BU_COMP_FLAGS_KTX2_OUTPUT | BU_COMP_FLAGS_SRGB | + BU_COMP_FLAGS_THREADED | BU_COMP_FLAGS_GEN_MIPS_CLAMP | + BU_COMP_FLAGS_PRINT_STATS | BU_COMP_FLAGS_PRINT_STATUS; + + if (!bu_compress_texture(comp_params, basis_tex_format, quality_level, effort_level, flags, 0.0f)) + { + fprintf(stderr, "bu_compress_texture() failed\n"); + return EXIT_FAILURE; + } + + // Retrieve the compressed .KTX2 file data + uint64_t comp_size = bu_comp_params_get_comp_data_size(comp_params); + if (!comp_size) + { + fprintf(stderr, "bu_comp_params_get_comp_data_size() failed\n"); + return EXIT_FAILURE; + } + + void* pComp_data = (void*)bu_comp_params_get_comp_data_ofs(comp_params); + if (!pComp_data) + { + fprintf(stderr, "bu_comp_params_get_comp_data_ofs() failed\n"); + return EXIT_FAILURE; + } + + // Write the data to disk + write_blob_to_file("test.ktx2", pComp_data, (size_t)comp_size); + printf("Wrote file test.ktx2\n"); + + // Now inspect and transcode the .KTX2 data to png/astc files + if (!transcode_ktx2_file(pComp_data, (size_t)comp_size, "2D")) + { + fprintf(stderr, "transcode_ktx2_file() failed\n"); + return EXIT_FAILURE; + } + + bu_delete_comp_params(comp_params); + + free(pSrc_image); + return EXIT_SUCCESS; +} + +// 2D array/texture video test +int test_2D_array(BOOL tex_video_flag, int L, BOOL mipmap_flag) +{ + printf("------ test_2D_array() %i %i %i:\n", tex_video_flag, L, mipmap_flag); + + // Generate a test image + int W = 256, H = 256; + + // Compress it to .ktx2 + uint64_t comp_params = bu_new_comp_params(); + + const char* pDesc = tex_video_flag ? "video" : "array"; + + char filename_buf[256]; + + for (int layer = 0; layer < L; layer++) + { + uint8_t* pSrc_image = create_pretty_rgba_pattern(W, H, (float)layer * .05f); + + // Save the test image to a .tga file + snprintf(filename_buf, sizeof(filename_buf), "test_%s_layer_%u.tga", pDesc, layer); + + write_tga_image(filename_buf, W, H, TRUE, pSrc_image); + printf("Wrote file %s\n", filename_buf); + + // Allocate memory + uint64_t img_ofs = bu_alloc(W * H * 4); + if (!img_ofs) + { + fprintf(stderr, "bu_alloc() failed\n"); + return EXIT_FAILURE; + } + + // Copy the test image into the allocated memory + memcpy((void*)img_ofs, pSrc_image, W * H * 4); + + // Supply the image to the compressor - it'll immediately make a copy of the data + if (!bu_comp_params_set_image_rgba32(comp_params, layer, img_ofs, W, H, W * 4)) + { + fprintf(stderr, "bu_comp_params_set_image_rgba32() failed\n"); + return EXIT_FAILURE; + } + + bu_free(img_ofs); + img_ofs = 0; + + free(pSrc_image); + + } // layer + + // ETC1S has special optimizations for texture video (basic p-frames with skip blocks). + uint32_t basis_tex_format = tex_video_flag ? BTF_ETC1S : BTF_XUASTC_LDR_4X4; + + uint32_t quality_level = 100; + uint32_t effort_level = 4; + + uint32_t flags = BU_COMP_FLAGS_KTX2_OUTPUT | BU_COMP_FLAGS_SRGB | + BU_COMP_FLAGS_THREADED | + BU_COMP_FLAGS_PRINT_STATS | BU_COMP_FLAGS_PRINT_STATUS; + + if (tex_video_flag) + flags |= BU_COMP_FLAGS_TEXTURE_TYPE_VIDEO_FRAMES; + else + flags |= BU_COMP_FLAGS_TEXTURE_TYPE_2D_ARRAY; + + if (mipmap_flag) + flags |= BU_COMP_FLAGS_GEN_MIPS_CLAMP; + + if (!bu_compress_texture(comp_params, basis_tex_format, quality_level, effort_level, flags, 0.0f)) + { + fprintf(stderr, "bu_compress_texture() failed\n"); + return EXIT_FAILURE; + } + + // Retrieve the compressed .KTX2 file data + uint64_t comp_size = bu_comp_params_get_comp_data_size(comp_params); + if (!comp_size) + { + fprintf(stderr, "bu_comp_params_get_comp_data_size() failed\n"); + return EXIT_FAILURE; + } + + void* pComp_data = (void*)bu_comp_params_get_comp_data_ofs(comp_params); + if (!pComp_data) + { + fprintf(stderr, "bu_comp_params_get_comp_data_ofs() failed\n"); + return EXIT_FAILURE; + } + + // Write the data to disk + snprintf(filename_buf, sizeof(filename_buf), "test_%s.ktx2", pDesc); + write_blob_to_file(filename_buf, pComp_data, (size_t)comp_size); + printf("Wrote file %s\n", filename_buf); + + // Now inspect and transcode the .KTX2 data to png/astc files + if (!transcode_ktx2_file(pComp_data, (size_t)comp_size, pDesc)) + { + fprintf(stderr, "transcode_ktx2_file() failed\n"); + return EXIT_FAILURE; + } + + bu_delete_comp_params(comp_params); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + printf("example_capi.c:\n"); + + // Initialize the encoder (which initializers the transcoder for us) + printf("bu_init:\n"); + bu_init(); + + // bu_init() already does this for us, but it's harmless to call again. + printf("bt_init:\n"); + bt_init(); + + // Control debug output from the compressor + bu_enable_debug_printf(FALSE); + + // simple 2D + if (test_2D() != EXIT_SUCCESS) + { + fprintf(stderr, "test_2D() failed!\n"); + return EXIT_FAILURE; + } + + // 2D array + if (test_2D_array(FALSE, 8, FALSE) != EXIT_SUCCESS) + { + fprintf(stderr, "test_2D_array() (array mode) failed!\n"); + return EXIT_FAILURE; + } + + // texture video + if (test_2D_array(TRUE, 8, TRUE) != EXIT_SUCCESS) + { + fprintf(stderr, "test_2D_array() (texture video mode) failed!\n"); + return EXIT_FAILURE; + } + + printf("Success\n"); + + return EXIT_SUCCESS; +} + + diff --git a/external/basis_universal/example_capi/example_capi.vcxproj b/external/basis_universal/example_capi/example_capi.vcxproj new file mode 100644 index 0000000000..758b712957 --- /dev/null +++ b/external/basis_universal/example_capi/example_capi.vcxproj @@ -0,0 +1,238 @@ + + + + + Debug + ARM64EC + + + Debug + Win32 + + + Release + ARM64EC + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 18.0 + Win32Proj + {be889347-e4fd-47dd-bbf4-81f98faa8ba9} + examplecapi + 10.0 + + + + Application + true + v145 + Unicode + + + Application + false + v145 + true + Unicode + + + Application + true + v145 + Unicode + + + Application + true + v145 + Unicode + + + Application + false + v145 + true + Unicode + + + Application + false + v145 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + $(SolutionDir)\bin\ + + + + Level3 + true + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) + + + + + Level3 + true + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl64.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl64.lib;softintrin.lib;%(AdditionalDependencies) + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl64.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp20 + ..\OpenCL + + + Console + true + ..\OpenCL\lib + opencl64.lib;softintrin.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + {97c34996-f458-4030-a402-b32c581872f1} + + + + + + \ No newline at end of file diff --git a/external/basis_universal/example_capi/example_capi.vcxproj.filters b/external/basis_universal/example_capi/example_capi.vcxproj.filters new file mode 100644 index 0000000000..aa9303e4e8 --- /dev/null +++ b/external/basis_universal/example_capi/example_capi.vcxproj.filters @@ -0,0 +1,39 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/external/basis_universal/example_transcoding/dds_defs.h b/external/basis_universal/example_transcoding/dds_defs.h new file mode 100644 index 0000000000..cbca0a5a21 --- /dev/null +++ b/external/basis_universal/example_transcoding/dds_defs.h @@ -0,0 +1,286 @@ +// File: dds_defs.h +// DX9/10 .DDS file header definitions. +#pragma once + +#define PIXEL_FMT_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + +enum pixel_format +{ + PIXEL_FMT_INVALID = 0, + + PIXEL_FMT_DXT1 = PIXEL_FMT_FOURCC('D', 'X', 'T', '1'), + PIXEL_FMT_DXT2 = PIXEL_FMT_FOURCC('D', 'X', 'T', '2'), + PIXEL_FMT_DXT3 = PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), + PIXEL_FMT_DXT4 = PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), + PIXEL_FMT_DXT5 = PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), + PIXEL_FMT_3DC = PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX + PIXEL_FMT_DXN = PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY + PIXEL_FMT_DXT5A = PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf + + // Non-standard formats (some of these are supported by ATI's Compressonator) + PIXEL_FMT_DXT5_CCxY = PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), + PIXEL_FMT_DXT5_xGxR = PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'), + PIXEL_FMT_DXT5_xGBR = PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'), + PIXEL_FMT_DXT5_AGBR = PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'), + + PIXEL_FMT_DXT1A = PIXEL_FMT_FOURCC('D', 'X', '1', 'A'), + PIXEL_FMT_ETC1 = PIXEL_FMT_FOURCC('E', 'T', 'C', '1'), + + PIXEL_FMT_R8G8B8 = PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'), + PIXEL_FMT_L8 = PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'), + PIXEL_FMT_A8 = PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'), + PIXEL_FMT_A8L8 = PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'), + PIXEL_FMT_A8R8G8B8 = PIXEL_FMT_FOURCC('R', 'G', 'B', 'A') +}; + +const uint32_t cDDSMaxImageDimensions = 8192U; + +// Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2; +const uint32_t cDDSSizeofDDSurfaceDesc2 = 124; + +// "DDS " +const uint32_t cDDSFileSignature = 0x20534444; + +struct DDCOLORKEY +{ + uint32_t dwUnused0; + uint32_t dwUnused1; +}; + +struct DDPIXELFORMAT +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwFourCC; + uint32_t dwRGBBitCount; // ATI compressonator will place a FOURCC code here for swizzled/cooked DXTn formats + uint32_t dwRBitMask; + uint32_t dwGBitMask; + uint32_t dwBBitMask; + uint32_t dwRGBAlphaBitMask; +}; + +struct DDSCAPS2 +{ + uint32_t dwCaps; + uint32_t dwCaps2; + uint32_t dwCaps3; + uint32_t dwCaps4; +}; + +struct DDSURFACEDESC2 +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwHeight; + uint32_t dwWidth; + union + { + int32_t lPitch; + uint32_t dwLinearSize; + }; + uint32_t dwBackBufferCount; + uint32_t dwMipMapCount; + uint32_t dwAlphaBitDepth; + uint32_t dwUnused0; + uint32_t lpSurface; + DDCOLORKEY unused0; + DDCOLORKEY unused1; + DDCOLORKEY unused2; + DDCOLORKEY unused3; + DDPIXELFORMAT ddpfPixelFormat; + DDSCAPS2 ddsCaps; + uint32_t dwUnused1; +}; + +const uint32_t DDSD_CAPS = 0x00000001; +const uint32_t DDSD_HEIGHT = 0x00000002; +const uint32_t DDSD_WIDTH = 0x00000004; +const uint32_t DDSD_PITCH = 0x00000008; + +const uint32_t DDSD_BACKBUFFERCOUNT = 0x00000020; +const uint32_t DDSD_ZBUFFERBITDEPTH = 0x00000040; +const uint32_t DDSD_ALPHABITDEPTH = 0x00000080; + +const uint32_t DDSD_LPSURFACE = 0x00000800; + +const uint32_t DDSD_PIXELFORMAT = 0x00001000; +const uint32_t DDSD_CKDESTOVERLAY = 0x00002000; +const uint32_t DDSD_CKDESTBLT = 0x00004000; +const uint32_t DDSD_CKSRCOVERLAY = 0x00008000; + +const uint32_t DDSD_CKSRCBLT = 0x00010000; +const uint32_t DDSD_MIPMAPCOUNT = 0x00020000; +const uint32_t DDSD_REFRESHRATE = 0x00040000; +const uint32_t DDSD_LINEARSIZE = 0x00080000; + +const uint32_t DDSD_TEXTURESTAGE = 0x00100000; +const uint32_t DDSD_FVF = 0x00200000; +const uint32_t DDSD_SRCVBHANDLE = 0x00400000; +const uint32_t DDSD_DEPTH = 0x00800000; + +const uint32_t DDSD_ALL = 0x00fff9ee; + +const uint32_t DDPF_ALPHAPIXELS = 0x00000001; +const uint32_t DDPF_ALPHA = 0x00000002; +const uint32_t DDPF_FOURCC = 0x00000004; +const uint32_t DDPF_PALETTEINDEXED8 = 0x00000020; +const uint32_t DDPF_RGB = 0x00000040; +const uint32_t DDPF_LUMINANCE = 0x00020000; + +const uint32_t DDSCAPS_COMPLEX = 0x00000008; +const uint32_t DDSCAPS_TEXTURE = 0x00001000; +const uint32_t DDSCAPS_MIPMAP = 0x00400000; + +const uint32_t DDSCAPS2_CUBEMAP = 0x00000200; +const uint32_t DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400; +const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800; + +const uint32_t DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000; +const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000; +const uint32_t DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000; +const uint32_t DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; + +const uint32_t DDSCAPS2_VOLUME = 0x00200000; + +typedef enum DXGI_FORMAT +{ + DXGI_FORMAT_UNKNOWN = 0, + DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, + DXGI_FORMAT_R32G32B32A32_FLOAT = 2, + DXGI_FORMAT_R32G32B32A32_UINT = 3, + DXGI_FORMAT_R32G32B32A32_SINT = 4, + DXGI_FORMAT_R32G32B32_TYPELESS = 5, + DXGI_FORMAT_R32G32B32_FLOAT = 6, + DXGI_FORMAT_R32G32B32_UINT = 7, + DXGI_FORMAT_R32G32B32_SINT = 8, + DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, + DXGI_FORMAT_R16G16B16A16_FLOAT = 10, + DXGI_FORMAT_R16G16B16A16_UNORM = 11, + DXGI_FORMAT_R16G16B16A16_UINT = 12, + DXGI_FORMAT_R16G16B16A16_SNORM = 13, + DXGI_FORMAT_R16G16B16A16_SINT = 14, + DXGI_FORMAT_R32G32_TYPELESS = 15, + DXGI_FORMAT_R32G32_FLOAT = 16, + DXGI_FORMAT_R32G32_UINT = 17, + DXGI_FORMAT_R32G32_SINT = 18, + DXGI_FORMAT_R32G8X24_TYPELESS = 19, + DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, + DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, + DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, + DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, + DXGI_FORMAT_R10G10B10A2_UNORM = 24, + DXGI_FORMAT_R10G10B10A2_UINT = 25, + DXGI_FORMAT_R11G11B10_FLOAT = 26, + DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, + DXGI_FORMAT_R8G8B8A8_UNORM = 28, + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, + DXGI_FORMAT_R8G8B8A8_UINT = 30, + DXGI_FORMAT_R8G8B8A8_SNORM = 31, + DXGI_FORMAT_R8G8B8A8_SINT = 32, + DXGI_FORMAT_R16G16_TYPELESS = 33, + DXGI_FORMAT_R16G16_FLOAT = 34, + DXGI_FORMAT_R16G16_UNORM = 35, + DXGI_FORMAT_R16G16_UINT = 36, + DXGI_FORMAT_R16G16_SNORM = 37, + DXGI_FORMAT_R16G16_SINT = 38, + DXGI_FORMAT_R32_TYPELESS = 39, + DXGI_FORMAT_D32_FLOAT = 40, + DXGI_FORMAT_R32_FLOAT = 41, + DXGI_FORMAT_R32_UINT = 42, + DXGI_FORMAT_R32_SINT = 43, + DXGI_FORMAT_R24G8_TYPELESS = 44, + DXGI_FORMAT_D24_UNORM_S8_UINT = 45, + DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, + DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, + DXGI_FORMAT_R8G8_TYPELESS = 48, + DXGI_FORMAT_R8G8_UNORM = 49, + DXGI_FORMAT_R8G8_UINT = 50, + DXGI_FORMAT_R8G8_SNORM = 51, + DXGI_FORMAT_R8G8_SINT = 52, + DXGI_FORMAT_R16_TYPELESS = 53, + DXGI_FORMAT_R16_FLOAT = 54, + DXGI_FORMAT_D16_UNORM = 55, + DXGI_FORMAT_R16_UNORM = 56, + DXGI_FORMAT_R16_UINT = 57, + DXGI_FORMAT_R16_SNORM = 58, + DXGI_FORMAT_R16_SINT = 59, + DXGI_FORMAT_R8_TYPELESS = 60, + DXGI_FORMAT_R8_UNORM = 61, + DXGI_FORMAT_R8_UINT = 62, + DXGI_FORMAT_R8_SNORM = 63, + DXGI_FORMAT_R8_SINT = 64, + DXGI_FORMAT_A8_UNORM = 65, + DXGI_FORMAT_R1_UNORM = 66, + DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, + DXGI_FORMAT_R8G8_B8G8_UNORM = 68, + DXGI_FORMAT_G8R8_G8B8_UNORM = 69, + DXGI_FORMAT_BC1_TYPELESS = 70, + DXGI_FORMAT_BC1_UNORM = 71, + DXGI_FORMAT_BC1_UNORM_SRGB = 72, + DXGI_FORMAT_BC2_TYPELESS = 73, + DXGI_FORMAT_BC2_UNORM = 74, + DXGI_FORMAT_BC2_UNORM_SRGB = 75, + DXGI_FORMAT_BC3_TYPELESS = 76, + DXGI_FORMAT_BC3_UNORM = 77, + DXGI_FORMAT_BC3_UNORM_SRGB = 78, + DXGI_FORMAT_BC4_TYPELESS = 79, + DXGI_FORMAT_BC4_UNORM = 80, + DXGI_FORMAT_BC4_SNORM = 81, + DXGI_FORMAT_BC5_TYPELESS = 82, + DXGI_FORMAT_BC5_UNORM = 83, + DXGI_FORMAT_BC5_SNORM = 84, + DXGI_FORMAT_B5G6R5_UNORM = 85, + DXGI_FORMAT_B5G5R5A1_UNORM = 86, + DXGI_FORMAT_B8G8R8A8_UNORM = 87, + DXGI_FORMAT_B8G8R8X8_UNORM = 88, + DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, + DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, + DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, + DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, + DXGI_FORMAT_BC6H_TYPELESS = 94, + DXGI_FORMAT_BC6H_UF16 = 95, + DXGI_FORMAT_BC6H_SF16 = 96, + DXGI_FORMAT_BC7_TYPELESS = 97, + DXGI_FORMAT_BC7_UNORM = 98, + DXGI_FORMAT_BC7_UNORM_SRGB = 99, + DXGI_FORMAT_AYUV = 100, + DXGI_FORMAT_Y410 = 101, + DXGI_FORMAT_Y416 = 102, + DXGI_FORMAT_NV12 = 103, + DXGI_FORMAT_P010 = 104, + DXGI_FORMAT_P016 = 105, + DXGI_FORMAT_420_OPAQUE = 106, + DXGI_FORMAT_YUY2 = 107, + DXGI_FORMAT_Y210 = 108, + DXGI_FORMAT_Y216 = 109, + DXGI_FORMAT_NV11 = 110, + DXGI_FORMAT_AI44 = 111, + DXGI_FORMAT_IA44 = 112, + DXGI_FORMAT_P8 = 113, + DXGI_FORMAT_A8P8 = 114, + DXGI_FORMAT_B4G4R4A4_UNORM = 115, + DXGI_FORMAT_P208 = 130, + DXGI_FORMAT_V208 = 131, + DXGI_FORMAT_V408 = 132, + DXGI_FORMAT_FORCE_UINT = 0xffffffff +} DXGI_FORMAT; + +enum D3D10_RESOURCE_DIMENSION +{ + D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, + D3D10_RESOURCE_DIMENSION_BUFFER = 1, + D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, + D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, + D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4 +}; + +struct DDS_HEADER_DXT10 +{ + DXGI_FORMAT dxgiFormat; + D3D10_RESOURCE_DIMENSION resourceDimension; + uint32_t miscFlag; + uint32_t arraySize; + uint32_t miscFlags2; +}; + diff --git a/external/basis_universal/example_transcoding/example_transcoding.cpp b/external/basis_universal/example_transcoding/example_transcoding.cpp new file mode 100644 index 0000000000..32335e4345 --- /dev/null +++ b/external/basis_universal/example_transcoding/example_transcoding.cpp @@ -0,0 +1,100 @@ +// example_transcoding.cpp: Very simple transcoding-only example. Does not depend on the basisu encoder library at all, just basisu_transcoder.cpp. +// You can use AMD Compressonator or Microsoft's DirectXTex tools on github to view the written DX10 .DDS file. +#include +#include + +// for testing +//#define BASISD_SUPPORT_XUASTC (0) +//#define BASISD_SUPPORT_KTX2_ZSTD (0) + +#include "../transcoder/basisu_transcoder.h" +#include "utils.h" + +int main() +{ + basist::basisu_transcoder_init(); + + // Read the .KTX2 file's data into memory. + utils::uint8_vec ktx2_file_data; + if (!utils::read_file("../test_files/base_xuastc_arith.ktx2", ktx2_file_data)) + { + if (!utils::read_file("base_xuastc_arith.ktx2", ktx2_file_data)) + { + fprintf(stderr, "Can't read file ../test_files/base_xuastc_arith.ktx2 or base_xuastc_arith.ktx2\n"); + return EXIT_FAILURE; + } + } + + printf("Read file base_xuastc_arith.ktx2\n"); + + if (ktx2_file_data.size() > UINT32_MAX) + { + fprintf(stderr, "KTX2 file too large\n"); + return EXIT_FAILURE; + } + + basist::ktx2_transcoder transcoder; + + // Initialize the transcoder. + if (!transcoder.init(ktx2_file_data.data(), (uint32_t)ktx2_file_data.size())) + return EXIT_FAILURE; + + const uint32_t width = transcoder.get_width(); + const uint32_t height = transcoder.get_height(); + const uint32_t num_levels = transcoder.get_levels(); + const bool is_srgb = transcoder.is_srgb(); + + printf("KTX2 dimensions: %ux%u, num mip levels: %u, sRGB: %u\n", width, height, num_levels, is_srgb); + + // Can't transcode HDR to LDR formats. + if (transcoder.is_hdr()) + { + fprintf(stderr, "Expected LDR KTX2 file\n"); + return EXIT_FAILURE; + } + + // Ensure BC7 support was enabled at compilation time (it will be enabled by default). + const basist::transcoder_texture_format tex_fmt = basist::transcoder_texture_format::cTFBC7_RGBA; + if (!basist::basis_is_format_supported(tex_fmt, transcoder.get_basis_tex_format())) + { + printf("BC7 was disabled in the transcoder at compilation\n"); + return EXIT_FAILURE; + } + + // Begin transcoding (this will be a no-op with UASTC HDR textures, but you still need to do it. For ETC1S it'll unpack the global codebooks). + transcoder.start_transcoding(); + + // Transcode to BC7 and write a BC7 .DDS file. + + // Bytes per block (8 or 16 for BC1-7) + const uint32_t bytes_per_block = basist::basis_get_bytes_per_block_or_pixel(tex_fmt); + // Compute total bytes needed to transcode the slice + const uint32_t total_bytes = basist::basis_compute_transcoded_image_size_in_bytes(tex_fmt, width, height); + // Derive the total number of blocks the output buffer can hold. The transcoder will use this to verify the buffer is large enough. + const uint32_t total_blocks = total_bytes / bytes_per_block; + + // Allocate the buffer to hold the blocks + utils::uint8_vec tex_buffer(total_bytes); + + // Transcode the level + bool status = transcoder.transcode_image_level(0, 0, 0, + tex_buffer.data(), total_blocks, + tex_fmt, 0); + + if (!status) + { + fprintf(stderr, "transcoder.transcode_image_level() failed\n"); + return EXIT_FAILURE; + } + + // Write an sRGB DX10-style .DDS file. + if (!utils::save_dds("out.dds", width, height, tex_buffer.data(), 8, DXGI_FORMAT_BC7_UNORM_SRGB, true, true)) + { + fprintf(stderr, "save_dds() failed\n"); + return EXIT_FAILURE; + } + + printf("Wrote out.dds\n"); + + return EXIT_SUCCESS; +} diff --git a/external/basis_universal/example_transcoding/example_transcoding.manifest b/external/basis_universal/example_transcoding/example_transcoding.manifest new file mode 100644 index 0000000000..b4baf6b96d --- /dev/null +++ b/external/basis_universal/example_transcoding/example_transcoding.manifest @@ -0,0 +1,10 @@ + + + + + + UTF-8 + + + + diff --git a/external/basis_universal/example_transcoding/example_transcoding.vcxproj b/external/basis_universal/example_transcoding/example_transcoding.vcxproj new file mode 100644 index 0000000000..2cd44e1412 --- /dev/null +++ b/external/basis_universal/example_transcoding/example_transcoding.vcxproj @@ -0,0 +1,202 @@ + + + + + Debug + ARM64EC + + + Debug + Win32 + + + Release + ARM64EC + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 17.0 + Win32Proj + {13333092-fcfe-4d74-8e76-f10c6037593c} + exampletranscoding + 10.0 + + + + Application + true + Unicode + v145 + + + Application + false + true + Unicode + v145 + + + Application + true + v145 + Unicode + + + Application + true + v145 + Unicode + + + Application + false + v145 + true + Unicode + + + Application + false + v145 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + + + + + Level4 + true + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + AdvancedVectorExtensions + + + Console + true + + + + + Level4 + true + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + AdvancedVectorExtensions + + + Console + true + + + + + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + Level4 + + + Console + true + + + + + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + Level4 + + + Console + true + + + + + Level4 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + Level4 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + stdcpp17 + + + Console + true + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/external/basis_universal/example_transcoding/example_transcoding.vcxproj.filters b/external/basis_universal/example_transcoding/example_transcoding.vcxproj.filters new file mode 100644 index 0000000000..563e6b9ffa --- /dev/null +++ b/external/basis_universal/example_transcoding/example_transcoding.vcxproj.filters @@ -0,0 +1,47 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {db43163f-6d1b-46cf-90ad-24650d502e6a} + + + + + Source Files + + + Source Files + + + Source Files\utils + + + Source Files\utils + + + + + Source Files + + + + + Source Files\utils + + + Source Files\utils + + + \ No newline at end of file diff --git a/external/basis_universal/example_transcoding/utils.cpp b/external/basis_universal/example_transcoding/utils.cpp new file mode 100644 index 0000000000..e42e71feeb --- /dev/null +++ b/external/basis_universal/example_transcoding/utils.cpp @@ -0,0 +1,948 @@ +// File: utils.cpp +#include "utils.h" +//#include "lodepng.h" +//#include "miniz.h" + +namespace utils +{ + +#define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); } + +// See http://www.realtimerendering.com/resources/GraphicsGems/gems/SeedFill.c +uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector* pSet_pixels) +{ + uint32_t total_set = 0; + + if (!flood_fill_is_inside(x, y, b)) + return 0; + + std::vector stack; + stack.reserve(64); + + FLOOD_PUSH(y, x, x, 1); + FLOOD_PUSH(y + 1, x, x, -1); + + while (stack.size()) + { + fill_segment s = stack.back(); + stack.pop_back(); + + int x1 = s.m_xl, x2 = s.m_xr, dy = s.m_dy; + y = s.m_y + s.m_dy; + + for (x = x1; (x >= 0) && flood_fill_is_inside(x, y, b); x--) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + + int l; + + if (x >= x1) + goto skip; + + l = x + 1; + if (l < x1) + FLOOD_PUSH(y, l, x1 - 1, -dy); + + x = x1 + 1; + + do + { + for (; x <= ((int)m_width - 1) && flood_fill_is_inside(x, y, b); x++) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + FLOOD_PUSH(y, l, x - 1, dy); + + if (x > (x2 + 1)) + FLOOD_PUSH(y, x2 + 1, x - 1, -dy); + + skip: + for (x++; x <= x2 && !flood_fill_is_inside(x, y, b); x++) + ; + + l = x; + } while (x <= x2); + } + + return total_set; +} + +void image_u8::draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color) +{ + if (xs > xe) + { + std::swap(xs, xe); + std::swap(ys, ye); + } + + int dx = xe - xs, dy = ye - ys; + if (!dx) + { + if (ys > ye) + std::swap(ys, ye); + for (int i = ys; i <= ye; i++) + set_pixel_clipped(xs, i, color); + } + else if (!dy) + { + for (int i = xs; i < xe; i++) + set_pixel_clipped(i, ys, color); + } + else if (dy > 0) + { + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); + rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); + } + } + else + { + dy = -dy; + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); + rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); + } + } +} + +void image_u8::rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color) +{ + int start, end, var; + + if (pred) + { + start = ys; + end = ye; + var = xs; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(var, i, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } + else + { + start = xs; + end = xe; + var = ys; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(i, var, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } +} + +#if 0 +bool load_png(const char* pFilename, image_u8& img) +{ + img.clear(); + + std::vector pixels; + unsigned int w = 0, h = 0; + unsigned int e = lodepng::decode(pixels, w, h, pFilename); + if (e != 0) + { + fprintf(stderr, "Failed loading PNG file %s\n", pFilename); + return false; + } + + img.init(w, h); + memcpy(&img.get_pixels()[0], &pixels[0], w * h * sizeof(uint32_t)); + + return true; +} + +bool save_png(const char* pFilename, const image_u8& img, bool save_alpha) +{ + const uint32_t w = img.width(); + const uint32_t h = img.height(); + + std::vector pixels; + if (save_alpha) + { + pixels.resize(w * h * sizeof(color_quad_u8)); + memcpy(&pixels[0], &img.get_pixels()[0], w * h * sizeof(color_quad_u8)); + } + else + { + pixels.resize(w * h * 3); + unsigned char* pDst = &pixels[0]; + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++, pDst += 3) + pDst[0] = img(x, y)[0], pDst[1] = img(x, y)[1], pDst[2] = img(x, y)[2]; + } + + return lodepng::encode(pFilename, pixels, w, h, save_alpha ? LCT_RGBA : LCT_RGB) == 0; +} +#endif + +static float gauss(int x, int y, float sigma_sqr) +{ + float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr))); + float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; + return g; +} + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) +{ + assert(size_x & size_y & 1); + + if (!(size_x | size_y)) + return; + + int mid_x = size_x / 2; + int mid_y = size_y / 2; + + double sum = 0; + for (int x = 0; x < size_x; x++) + { + for (int y = 0; y < size_y; y++) + { + float g; + if ((x > mid_x) && (y < mid_y)) + g = pDst[(size_x - x - 1) + y * size_x]; + else if ((x < mid_x) && (y > mid_y)) + g = pDst[x + (size_y - y - 1) * size_x]; + else if ((x > mid_x) && (y > mid_y)) + g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x]; + else + g = gauss(x - mid_x, y - mid_y, sigma_sqr); + + pDst[x + y * size_x] = g; + sum += g; + } + } + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + { + sum = pDst[mid_x + mid_y * size_x]; + } + + if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize)) + { + double one_over_sum = 1.0f / sum; + for (int i = 0; i < size_x * size_y; i++) + pDst[i] = static_cast(pDst[i] * one_over_sum); + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + pDst[mid_x + mid_y * size_x] = 1.0f; + } + + if (flags & cComputeGaussianFlagPrint) + { + printf("{\n"); + for (int y = 0; y < size_y; y++) + { + printf(" "); + for (int x = 0; x < size_x; x++) + { + printf("%f, ", pDst[x + y * size_x]); + } + printf("\n"); + } + printf("}"); + } +} + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor) +{ + assert(odd_filter_width && (odd_filter_width & 1)); + odd_filter_width |= 1; + + std::vector kernel(odd_filter_width * odd_filter_width); + compute_gaussian_kernel(&kernel[0], odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize); + + const int dst_width = orig_img.get_width() / width_divisor; + const int dst_height = orig_img.get_height() / height_divisor; + + const int H = odd_filter_width / 2; + const int L = -H; + + dst.crop(dst_width, dst_height); + +//#pragma omp parallel for + for (int oy = 0; oy < dst_height; oy++) + { + for (int ox = 0; ox < dst_width; ox++) + { + vec4F c(0.0f); + + for (int yd = L; yd <= H; yd++) + { + int y = oy * height_divisor + (height_divisor >> 1) + yd; + + for (int xd = L; xd <= H; xd++) + { + int x = ox * width_divisor + (width_divisor >> 1) + xd; + + const vec4F& p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping); + + float w = kernel[(xd + H) + (yd + H) * odd_filter_width]; + c[0] += p[0] * w; + c[1] += p[1] * w; + c[2] += p[2] * w; + c[3] += p[3] * w; + } + } + + dst(ox, oy).set(c[0], c[1], c[2], c[3]); + } + } +} + +static void pow_image(const imagef& src, imagef& dst, const vec4F& power) +{ + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f)) + dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]); + else + dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3])); + } + } +} + +#if 0 +static void mul_image(const imagef& src, imagef& dst, const vec4F& mul) +{ + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]); + } + } +} +#endif + +static void scale_image(const imagef& src, imagef& dst, const vec4F& scale, const vec4F& shift) +{ + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + d[c] = scale[c] * p[c] + shift[c]; + + dst(x, y).set(d[0], d[1], d[2], d[3]); + } + } +} + +static void add_weighted_image(const imagef& src1, const vec4F& alpha, const imagef& src2, const vec4F& beta, const vec4F& gamma, imagef& dst) +{ + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set( + s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0], + s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1], + s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2], + s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]); + } + } +} + +static void add_image(const imagef& src1, const imagef& src2, imagef& dst) +{ + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]); + } + } +} + +static void adds_image(const imagef& src, const vec4F& value, imagef& dst) +{ + dst.resize(src); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]); + } + } +} + +static void mul_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v1 = s1[c]; + float v2 = s2[c]; + d[c] = v1 * v2 * scale[c]; + } + + dst(x, y) = d; + } + } +} + +static void div_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +//#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v = s2[c]; + if (v == 0.0f) + d[c] = 0.0f; + else + d[c] = (s1[c] * scale[c]) / v; + } + + dst(x, y) = d; + } + } +} + +static vec4F avg_image(const imagef& src) +{ + vec4F avg(0.0f); + + for (uint32_t y = 0; y < src.get_height(); y++) + { + for (uint32_t x = 0; x < src.get_width(); x++) + { + const vec4F& s = src(x, y); + + avg += vec4F(s[0], s[1], s[2], s[3]); + } + } + + avg /= static_cast(src.get_total_pixels()); + + return avg; +} + +// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html +vec4F compute_ssim(const imagef& a, const imagef& b) +{ + imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; + + const float C1 = 6.50250f, C2 = 58.52250f; + + pow_image(a, a_sq, vec4F(2)); + pow_image(b, b_sq, vec4F(2)); + mul_image(a, b, axb, vec4F(1.0f)); + + gaussian_filter(mu1, a, 11, 1.5f * 1.5f); + gaussian_filter(mu2, b, 11, 1.5f * 1.5f); + + pow_image(mu1, mu1_sq, vec4F(2)); + pow_image(mu2, mu2_sq, vec4F(2)); + mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f)); + + gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f); + add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq); + + gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f); + add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq); + + gaussian_filter(s12, axb, 11, 1.5f * 1.5f); + add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12); + + scale_image(mu1_mu2, t1, vec4F(2), vec4F(0)); + adds_image(t1, vec4F(C1), t1); + + scale_image(s12, t2, vec4F(2), vec4F(0)); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t3, vec4F(1)); + + add_image(mu1_sq, mu2_sq, t1); + adds_image(t1, vec4F(C1), t1); + + add_image(s1_sq, s2_sq, t2); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t1, vec4F(1)); + + div_image(t3, t1, smap, vec4F(1)); + + return avg_image(smap); +} + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma) +{ + image_u8 ta(a), tb(b); + + if ((ta.width() != tb.width()) || (ta.height() != tb.height())) + { + fprintf(stderr, "compute_ssim: Cropping input images to equal dimensions\n"); + + const uint32_t w = std::min(a.width(), b.width()); + const uint32_t h = std::min(a.height(), b.height()); + ta.crop(w, h); + tb.crop(w, h); + } + + if (!ta.width() || !ta.height()) + { + assert(0); + return vec4F(0); + } + + if (luma) + { + for (uint32_t y = 0; y < ta.height(); y++) + { + for (uint32_t x = 0; x < ta.width(); x++) + { + ta(x, y).set((uint8_t)ta(x, y).get_luma(), ta(x, y).a); + tb(x, y).set((uint8_t)tb(x, y).get_luma(), tb(x, y).a); + } + } + } + + imagef fta, ftb; + + fta.set(ta); + ftb.set(tb); + + return compute_ssim(fta, ftb); +} + +bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header) +{ + (void)srgb; + + FILE* pFile = NULL; +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + { + fprintf(stderr, "Failed creating file %s!\n", pFilename); + return false; + } + + fwrite("DDS ", 4, 1, pFile); + + DDSURFACEDESC2 desc; + memset(&desc, 0, sizeof(desc)); + + desc.dwSize = sizeof(desc); + desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; + + desc.dwWidth = width; + desc.dwHeight = height; + + desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat); + + desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC; + + desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * pixel_format_bpp) >> 3; + desc.dwFlags |= DDSD_LINEARSIZE; + + desc.ddpfPixelFormat.dwRGBBitCount = 0; + + if ((!force_dx10_header) && + ((dxgi_format == DXGI_FORMAT_BC1_UNORM) || + (dxgi_format == DXGI_FORMAT_BC3_UNORM) || + (dxgi_format == DXGI_FORMAT_BC4_UNORM) || + (dxgi_format == DXGI_FORMAT_BC5_UNORM))) + { + if (dxgi_format == DXGI_FORMAT_BC1_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '1'); + else if (dxgi_format == DXGI_FORMAT_BC3_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '5'); + else if (dxgi_format == DXGI_FORMAT_BC4_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '1'); + else if (dxgi_format == DXGI_FORMAT_BC5_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '2'); + + fwrite(&desc, sizeof(desc), 1, pFile); + } + else + { + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', '1', '0'); + + fwrite(&desc, sizeof(desc), 1, pFile); + + DDS_HEADER_DXT10 hdr10; + memset(&hdr10, 0, sizeof(hdr10)); + + // Not all tools support DXGI_FORMAT_BC7_UNORM_SRGB (like NVTT), but ddsview in DirectXTex pays attention to it. So not sure what to do here. + // For best compatibility just write DXGI_FORMAT_BC7_UNORM. + //hdr10.dxgiFormat = srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM; + hdr10.dxgiFormat = dxgi_format; // DXGI_FORMAT_BC7_UNORM; + hdr10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + hdr10.arraySize = 1; + + fwrite(&hdr10, sizeof(hdr10), 1, pFile); + } + + fwrite(pBlocks, desc.lPitch, 1, pFile); + + if (fclose(pFile) == EOF) + { + fprintf(stderr, "Failed writing to DDS file %s!\n", pFilename); + return false; + } + + return true; +} + +void strip_extension(std::string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if (s[i] == '.') + { + s.resize(i); + break; + } + } +} + +void strip_path(std::string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if ((s[i] == '/') || (s[i] == ':') || (s[i] == '\\')) + { + s.erase(0, i + 1); + break; + } + } +} + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len) +{ + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps) +{ + tracked_stat comp_stats[4]; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_quad_u8* pPixel = pPixels + x + y * block_width; + + for (uint32_t c = 0; c < num_comps; c++) + comp_stats[c].update(pPixel->m_c[c]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + return max_std_dev; +} + +const uint32_t ASTC_SIG = 0x5CA1AB13; + +#pragma pack(push, 1) +struct astc_header +{ + uint32_t m_sig; + uint8_t m_block_x; + uint8_t m_block_y; + uint8_t m_block_z; + uint8_t m_width[3]; + uint8_t m_height[3]; + uint8_t m_depth[3]; +}; +#pragma pack(pop) + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + memset(&hdr, 0, sizeof(hdr)); + + hdr.m_sig = ASTC_SIG; + hdr.m_block_x = (uint8_t)block_width; + hdr.m_block_y = (uint8_t)block_height; + hdr.m_block_z = 1; + hdr.m_width[0] = (uint8_t)(width); + hdr.m_width[1] = (uint8_t)(width >> 8); + hdr.m_width[2] = (uint8_t)(width >> 16); + hdr.m_height[0] = (uint8_t)(height); + hdr.m_height[1] = (uint8_t)(height >> 8); + hdr.m_height[2] = (uint8_t)(height >> 16); + hdr.m_depth[0] = 1; + fwrite(&hdr, sizeof(hdr), 1, pFile); + + fwrite(blocks.data(), 16, blocks.size(), pFile); + if (fclose(pFile) == EOF) + return false; + + return true; +} + +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + if (fread(&hdr, sizeof(hdr), 1, pFile) != 1) + { + fclose(pFile); + return false; + } + + if (hdr.m_sig != ASTC_SIG) + { + fclose(pFile); + return false; + } + + width = hdr.m_width[0] + (hdr.m_width[1] << 8) + (hdr.m_width[2] << 16); + height = hdr.m_height[0] + (hdr.m_height[1] << 8) + (hdr.m_height[2] << 16); + uint32_t depth = hdr.m_depth[0] + (hdr.m_depth[1] << 8) + (hdr.m_depth[2] << 16); + + if ((width < 1) || (width > 32768) || (height < 1) || (height > 32768)) + return false; + if ((hdr.m_block_z != 1) || (depth != 1)) + return false; + + block_width = hdr.m_block_x; + block_height = hdr.m_block_y; + + if ((block_width < 4) || (block_width > 12) || (block_height < 4) || (block_height > 12)) + return false; + + uint32_t blocks_x = (width + block_width - 1) / block_width; + uint32_t blocks_y = (height + block_height - 1) / block_height; + uint32_t total_blocks = blocks_x * blocks_y; + + blocks.resize(total_blocks); + + if (fread(blocks.data(), 16, total_blocks, pFile) != total_blocks) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; +} + +#if 0 +uint32_t get_deflate_size(const void* pData, size_t data_size) +{ + size_t comp_size = 0; + void* pPre_RDO_Comp_data = tdefl_compress_mem_to_heap(pData, data_size, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + mz_free(pPre_RDO_Comp_data); + + if (comp_size > UINT32_MAX) + return UINT32_MAX; + + return (uint32_t)comp_size; +} +#endif + +bool read_file(const char* pFilename, uint8_vec& buf) +{ + buf.resize(0); + + FILE* pFile = nullptr; +#if _MSC_VER + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + if (!pFile) + return false; + + fseek(pFile, 0, SEEK_END); + + long file_end_ofs = ftell(pFile); + if (file_end_ofs <= 0) + { + fclose(pFile); + return false; + } + + size_t sz = static_cast(file_end_ofs); + if (sz != (unsigned long)file_end_ofs) + { + fclose(pFile); + return false; + } + + fseek(pFile, 0, SEEK_SET); + + buf.resize(sz); + + if (fread(buf.data(), sizeof(uint8_t), sz, pFile) != sz) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; +} + +} // namespace utils diff --git a/external/basis_universal/example_transcoding/utils.h b/external/basis_universal/example_transcoding/utils.h new file mode 100644 index 0000000000..d161e5ff27 --- /dev/null +++ b/external/basis_universal/example_transcoding/utils.h @@ -0,0 +1,2621 @@ +// File: utils.h +#pragma once +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dds_defs.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define ASSUME(c) static_assert(c, #c) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#define VECTOR_TEXT_LINE_SIZE (30.0f) +#define VECTOR_TEXT_CORE_LINE_SIZE (21.0f) + +#define UNUSED(x) (void)x + +namespace utils +{ +typedef std::vector uint8_vec; + +extern const uint32_t g_pretty_colors[]; +extern const uint32_t g_num_pretty_colors; + +const float cDegToRad = 0.01745329252f; +const float cRadToDeg = 57.29577951f; + +enum eClear { cClear }; +enum eZero { cZero }; +enum eInitExpand { cInitExpand }; + +inline int iabs(int i) { if (i < 0) i = -i; return i; } +inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } +template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } +template inline F square(F a) { return a * a; } + +template +inline T prev_wrap(T i, T n) +{ + T temp = i - 1; + if (temp < 0) + temp = n - 1; + return temp; +} + +template +inline T next_wrap(T i, T n) +{ + T temp = i + 1; + if (temp >= n) + temp = 0; + return temp; +} + +inline int posmod(int x, int y) +{ + if (x >= 0) + return (x < y) ? x : (x % y); + int m = (-x) % y; + return (m != 0) ? (y - m) : m; +} + +inline float deg_to_rad(float f) +{ + return f * cDegToRad; +}; + +inline float rad_to_deg(float f) +{ + return f * cRadToDeg; +}; + +template +struct rel_ops +{ + friend bool operator!=(const T& x, const T& y) + { + return (!(x == y)); + } + friend bool operator>(const T& x, const T& y) + { + return (y < x); + } + friend bool operator<=(const T& x, const T& y) + { + return (!(y < x)); + } + friend bool operator>=(const T& x, const T& y) + { + return (!(x < y)); + } +}; + +template +class vec : public rel_ops > +{ +public: + typedef T scalar_type; + enum + { + num_elements = N + }; + + inline vec() + { + } + + inline vec(eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + explicit inline vec(T val) + { + set(val); + } + + inline vec(T val0, T val1) + { + set(val0, val1); + } + + inline vec(T val0, T val1, T val2) + { + set(val0, val1, val2); + } + + inline vec(T val0, T val1, T val2, T val3) + { + set(val0, val1, val2, val3); + } + + inline vec(T val0, T val1, T val2, T val3, T val4, T val5) + { + set(val0, val1, val2, val3, val4, val5); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19, + val20, val21, val22, val23, + val24); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint32_t m = std::min(N, ON); + uint32_t i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for (; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint32_t index, T val) + { + assert(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set(T val) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + inline vec& set(T val0, T val1) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + for (uint32_t i = 2; i < N; i++) + m_s[i] = 0; + } + return *this; + } + + inline vec& set(T val0, T val1, T val2) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + for (uint32_t i = 3; i < N; i++) + m_s[i] = 0; + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + for (uint32_t i = 4; i < N; i++) + m_s[i] = 0; + } + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3, T val4, T val5) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + if (N >= 5) + { + m_s[4] = val4; + + if (N >= 6) + { + m_s[5] = val5; + + for (uint32_t i = 6; i < N; i++) + m_s[i] = 0; + } + } + } + } + } + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + for (uint32_t i = 16; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + for (uint32_t i = 20; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + if (N >= 21) + m_s[20] = val20; + if (N >= 22) + m_s[21] = val21; + if (N >= 23) + m_s[22] = val22; + if (N >= 24) + m_s[23] = val23; + + if (N >= 25) + m_s[24] = val24; + + for (uint32_t i = 25; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint32_t s = std::min(N, O); + + uint32_t i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for (; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[](uint32_t i) const + { + assert(i < N); + return m_s[i]; + } + + inline T& operator[](uint32_t i) + { + assert(i < N); + return m_s[i]; + } + + template + inline uint64_t get_component_as_uint() const + { + ASSUME(index < N); + if (sizeof(T) == sizeof(float)) + return *reinterpret_cast(&m_s[index]); + else + return *reinterpret_cast(&m_s[index]); + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + ASSUME(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + ASSUME(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + ASSUME(N >= 4); + return m_s[3]; + } + + inline vec get_x_vector() const + { + return broadcast<0>(); + } + inline vec get_y_vector() const + { + return broadcast<1>(); + } + inline vec get_z_vector() const + { + return broadcast<2>(); + } + inline vec get_w_vector() const + { + return broadcast<3>(); + } + + inline T get_component(uint32_t i) const + { + return (*this)[i]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + ASSUME(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + ASSUME(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + ASSUME(N >= 4); + m_s[3] = v; + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_s[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_s[0]); + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint32_t i, uint32_t j) const + { + assert((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const + { + assert((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + assert((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + template + inline vec broadcast() const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint32_t i, uint32_t j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator*=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator/=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + // component-wise multiply (not a dot product like in previous versions) + // just remarking it out because it's too ambiguous, use dot() or mul_components() instead +#if 0 + friend inline vec operator*(const vec& lhs, const vec& rhs) + { + return vec::mul_components(lhs, rhs); + } +#endif + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator*(T val, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = val * rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + ASSUME(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross2(const vec& b) const + { + return cross2(*this, b); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + ASSUME(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross3(const vec& b) const + { + return cross3(*this, b); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + ASSUME(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline vec<3, T> cross(const vec& b) const + { + ASSUME(N >= 2); + return cross(*this, b); + } + + inline T dot(const vec& rhs) const + { + return dot(*this, rhs); + } + + inline vec dot_vector(const vec& rhs) const + { + return vec(dot(*this, rhs)); + } + + static inline T dot(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint32_t i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + inline T dot2(const vec& rhs) const + { + ASSUME(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + ASSUME(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T dot4(const vec& rhs) const + { + ASSUME(N >= 4); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + // returns squared length (norm) + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast(1.0f / sqrt(n)); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + ASSUME(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l, h)); + return *this; + } + + inline vec& saturate() + { + return clamp(0.0f, 1.0f); + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint32_t get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint32_t get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline void get_projection_axes(uint32_t& u, uint32_t& v) const + { + const int axis = get_major_axis(); + if (m_s[axis] < 0.0f) + { + v = next_wrap(axis, N); + u = next_wrap(v, N); + } + else + { + u = next_wrap(axis, N); + v = next_wrap(u, N); + } + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::min(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::max(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::min(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::max(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + *this -= (dot(dir) * dir); + return *this; + } + + inline vec get_remove_unit_direction(const vec& dir) const + { + return *this - (dot(dir) * dir); + } + + inline bool all_less(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + ret[1] = -m_s[1]; + if (N >= 3) + ret[2] = -m_s[2]; + + for (uint32_t i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + inline scalar_type perp_dot(const vec& b) const + { + ASSUME(N == 2); + return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; + } + + inline vec perp() const + { + ASSUME(N == 2); + return vec(-m_s[1], m_s[0]); + } + + inline vec get_floor() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = floor(m_s[i]); + return result; + } + + inline vec get_ceil() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = ceil(m_s[i]); + return result; + } + + // static helper methods + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; + return result; + } + + static inline vec make_axis(uint32_t i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] == b[i]); + return ret; + } + + static inline vec not_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] != b[i]); + return ret; + } + + static inline vec less_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] < b[i]); + return ret; + } + + static inline vec less_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] <= b[i]); + return ret; + } + + static inline vec greater_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] >= b[i]); + return ret; + } + + static inline vec greater_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] > b[i]); + return ret; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::max(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::min(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline bool equal_tol(const vec& a, const vec& b, float t) + { + for (uint32_t i = 0; i < N; i++) + if (!equal_tol(a.m_s[i], b.m_s[i], t)) + return false; + return true; + } + + inline bool equal_tol(const vec& b, float t) const + { + return equal_tol(*this, b, t); + } + +protected: + T m_s[N]; +}; + +typedef vec<1, double> vec1D; +typedef vec<2, double> vec2D; +typedef vec<3, double> vec3D; +typedef vec<4, double> vec4D; + +typedef vec<1, float> vec1F; + +typedef vec<2, float> vec2F; +typedef std::vector vec2F_array; + +typedef vec<3, float> vec3F; +typedef std::vector vec3F_array; + +typedef vec<4, float> vec4F; +typedef std::vector vec4F_array; + +typedef vec<2, uint32_t> vec2U; +typedef vec<3, uint32_t> vec3U; +typedef vec<2, int> vec2I; +typedef vec<3, int> vec3I; +typedef vec<4, int> vec4I; + +typedef vec<2, int16_t> vec2I16; +typedef vec<3, int16_t> vec3I16; + +inline vec2F rotate_point(const vec2F& p, float rad) +{ + float c = cos(rad); + float s = sin(rad); + + float x = p[0]; + float y = p[1]; + + return vec2F(x * c - y * s, x * s + y * c); +} + +class rect +{ +public: + inline rect() + { + } + + inline rect(eClear) + { + clear(); + } + + inline rect(eInitExpand) + { + init_expand(); + } + + // up to, but not including right/bottom + inline rect(int left, int top, int right, int bottom) + { + set(left, top, right, bottom); + } + + inline rect(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline rect(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline bool operator==(const rect& r) const + { + return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); + } + + inline bool operator<(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[i] < r.m_corner[i]) + return true; + else if (!(m_corner[i] == r.m_corner[i])) + return false; + } + + return false; + } + + inline void clear() + { + m_corner[0].clear(); + m_corner[1].clear(); + } + + inline void set(int left, int top, int right, int bottom) + { + m_corner[0].set(left, top); + m_corner[1].set(right, bottom); + } + + inline void set(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline void set(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline uint32_t get_width() const + { + return m_corner[1][0] - m_corner[0][0]; + } + inline uint32_t get_height() const + { + return m_corner[1][1] - m_corner[0][1]; + } + + inline int get_left() const + { + return m_corner[0][0]; + } + inline int get_top() const + { + return m_corner[0][1]; + } + inline int get_right() const + { + return m_corner[1][0]; + } + inline int get_bottom() const + { + return m_corner[1][1]; + } + + inline bool is_empty() const + { + return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); + } + + inline uint32_t get_dimension(uint32_t axis) const + { + return m_corner[1][axis] - m_corner[0][axis]; + } + inline uint32_t get_area() const + { + return get_dimension(0) * get_dimension(1); + } + + inline const vec2I& operator[](uint32_t i) const + { + assert(i < 2); + return m_corner[i]; + } + inline vec2I& operator[](uint32_t i) + { + assert(i < 2); + return m_corner[i]; + } + + inline rect& translate(int x_ofs, int y_ofs) + { + m_corner[0][0] += x_ofs; + m_corner[0][1] += y_ofs; + m_corner[1][0] += x_ofs; + m_corner[1][1] += y_ofs; + return *this; + } + + inline rect& init_expand() + { + m_corner[0].set(INT_MAX); + m_corner[1].set(INT_MIN); + return *this; + } + + inline rect& expand(int x, int y) + { + m_corner[0][0] = std::min(m_corner[0][0], x); + m_corner[0][1] = std::min(m_corner[0][1], y); + m_corner[1][0] = std::max(m_corner[1][0], x + 1); + m_corner[1][1] = std::max(m_corner[1][1], y + 1); + return *this; + } + + inline rect& expand(const rect& r) + { + m_corner[0][0] = std::min(m_corner[0][0], r[0][0]); + m_corner[0][1] = std::min(m_corner[0][1], r[0][1]); + m_corner[1][0] = std::max(m_corner[1][0], r[1][0]); + m_corner[1][1] = std::max(m_corner[1][1], r[1][1]); + return *this; + } + + inline bool touches(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (r[1][i] <= m_corner[0][i]) + return false; + else if (r[0][i] >= m_corner[1][i]) + return false; + } + + return true; + } + + inline bool fully_within(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[0][i] < r[0][i]) + return false; + else if (m_corner[1][i] > r[1][i]) + return false; + } + + return true; + } + + inline bool intersect(const rect& r) + { + if (!touches(r)) + { + clear(); + return false; + } + + for (uint32_t i = 0; i < 2; i++) + { + m_corner[0][i] = std::max(m_corner[0][i], r[0][i]); + m_corner[1][i] = std::min(m_corner[1][i], r[1][i]); + } + + return true; + } + + inline bool contains(int x, int y) const + { + return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && + (y >= m_corner[0][1]) && (y < m_corner[1][1]); + } + + inline bool contains(const vec2I& p) const + { + return contains(p[0], p[1]); + } + +private: + vec2I m_corner[2]; +}; + +inline rect make_rect(uint32_t width, uint32_t height) +{ + return rect(0, 0, width, height); +} + +struct color_quad_u8 +{ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) +#endif + union + { + uint8_t m_c[4]; + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + inline color_quad_u8(eClear) : color_quad_u8(0, 0, 0, 0) { } + + inline color_quad_u8(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + set(cr, cg, cb, ca); + } + + inline color_quad_u8(uint8_t cy = 0, uint8_t ca = 255) + { + set(cy, ca); + } + + inline void clear() + { + set(0, 0, 0, 0); + } + + inline color_quad_u8& set(uint8_t cy, uint8_t ca = 255) + { + m_c[0] = cy; + m_c[1] = cy; + m_c[2] = cy; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + m_c[0] = cr; + m_c[1] = cg; + m_c[2] = cb; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set_clamped(int cr, int cg, int cb, int ca) + { + m_c[0] = (uint8_t)clamp(cr, 0, 255); + m_c[1] = (uint8_t)clamp(cg, 0, 255); + m_c[2] = (uint8_t)clamp(cb, 0, 255); + m_c[3] = (uint8_t)clamp(ca, 0, 255); + return *this; + } + + color_quad_u8& set_alpha(int ca) { a = (uint8_t)clamp(ca, 0, 255); return *this; } + + inline uint8_t& operator[] (uint32_t i) { assert(i < 4); return m_c[i]; } + inline uint8_t operator[] (uint32_t i) const { assert(i < 4); return m_c[i]; } + + inline int get_luma() const { return (13938U * m_c[0] + 46869U * m_c[1] + 4729U * m_c[2] + 32768U) >> 16U; } // REC709 weightings + + inline bool operator== (const color_quad_u8& other) const + { + return (m_c[0] == other.m_c[0]) && (m_c[1] == other.m_c[1]) && (m_c[2] == other.m_c[2]) && (m_c[3] == other.m_c[3]); + } + + inline bool operator!= (const color_quad_u8& other) const + { + return !(*this == other); + } + + inline uint32_t squared_distance(const color_quad_u8& c, bool alpha = true) const + { + return square(r - c.r) + square(g - c.g) + square(b - c.b) + (alpha ? square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad_u8& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } +}; +typedef std::vector color_quad_u8_vec; + +inline uint32_t color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) +{ + if (perceptual) + { + const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; + const float cr1 = e1.r - l1; + const float cb1 = e1.b - l1; + + const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; + const float cr2 = e2.r - l2; + const float cb2 = e2.b - l2; + + const float dl = l1 - l2; + const float dcr = cr1 - cr2; + const float dcb = cb1 - cb2; + + uint32_t d = static_cast( + 32.0f * 4.0f * dl * dl + + 32.0f * 2.0f * (.5f / (1.0f - .2126f)) * (.5f / (1.0f - .2126f)) * dcr * dcr + + 32.0f * .25f * (.5f / (1.0f - .0722f)) * (.5f / (1.0f - .0722f)) * dcb * dcb); + + if (alpha) + { + int da = (int)e1.a - (int)e2.a; + + d += static_cast(128.0f * da * da); + } + + return d; + } + else + return e1.squared_distance(e2, alpha); +} + +extern color_quad_u8 g_white_color_u8, g_black_color_u8, g_red_color_u8, g_green_color_u8, g_blue_color_u8, g_yellow_color_u8, g_purple_color_u8, g_magenta_color_u8, g_cyan_color_u8; + +class image_u8 +{ +public: + image_u8() : + m_width(0), m_height(0), + m_clip_rect(cClear) + { + } + + image_u8(uint32_t width, uint32_t height) : + m_width(width), m_height(height), + m_clip_rect(0, 0, width, height) + { + m_pixels.resize(width * height); + } + + inline const color_quad_u8_vec& get_pixels() const { return m_pixels; } + inline color_quad_u8_vec& get_pixels() { return m_pixels; } + + inline uint32_t width() const { return m_width; } + inline uint32_t height() const { return m_height; } + inline uint32_t total_pixels() const { return m_width * m_height; } + + inline const rect& get_clip_rect() const { return m_clip_rect; } + + inline void set_clip_rect(const rect& r) + { + assert((r.get_left() >= 0) && (r.get_top() >= 0) && (r.get_right() <= (int)m_width) && (r.get_bottom() <= (int)m_height)); + + m_clip_rect = r; + } + + inline void clear_clip_rect() { m_clip_rect.set(0, 0, m_width, m_height); } + + inline bool is_clipped(int x, int y) const { return !m_clip_rect.contains(x, y); } + + inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } + + inline color_quad_u8& operator()(uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + inline const color_quad_u8& operator()(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + + image_u8& clear() + { + m_width = m_height = 0; + m_clip_rect.clear(); + m_pixels.clear(); + return *this; + } + + image_u8& init(uint32_t width, uint32_t height) + { + clear(); + + m_width = width; + m_height = height; + m_clip_rect.set(0, 0, width, height); + m_pixels.resize(width * height); + return *this; + } + + image_u8& set_all(const color_quad_u8& p) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = p; + return *this; + } + + inline const color_quad_u8& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_quad_u8& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline image_u8& set_pixel_clipped(int x, int y, const color_quad_u8& c) + { + if (!is_clipped(x, y)) + (*this)(x, y) = c; + return *this; + } + + inline image_u8& fill_box(int x, int y, int w, int h, const color_quad_u8& c) + { + for (int y_ofs = 0; y_ofs < h; y_ofs++) + for (int x_ofs = 0; x_ofs < w; x_ofs++) + set_pixel_clipped(x + x_ofs, y + y_ofs, c); + return *this; + } + + void invert_box(int inX, int inY, int inW, int inH) + { + for (int y = 0; y < inH; y++) + { + const uint32_t yy = inY + y; + + for (int x = 0; x < inW; x++) + { + const uint32_t xx = inX + x; + + if (is_clipped(xx, yy)) + continue; + + color_quad_u8 c((*this)(xx, yy)); + + c.r = 255 - c.r; + c.g = 255 - c.g; + c.b = 255 - c.b; + + set_pixel_clipped(xx, yy, c); + } + } + } + + image_u8& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + } + return *this; + } + + image_u8& crop(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + image_u8 new_image(new_width, new_height); + + const uint32_t w = std::min(m_width, new_width); + const uint32_t h = std::min(m_height, new_height); + + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + new_image(x, y) = (*this)(x, y); + + return swap(new_image); + } + + image_u8& swap(image_u8& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pixels, other.m_pixels); + std::swap(m_clip_rect, other.m_clip_rect); + return *this; + } + + // No clipping + inline void get_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(pPixels + y * width, &(*this)(bx * width, by * height + y), width * sizeof(color_quad_u8)); + } + + inline void get_block_clamped(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + pPixels[x + y * width] = get_clamped(bx * width + x, by * height + y); + } + + // No clipping + inline void set_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, const color_quad_u8* pPixels) + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(&(*this)(bx * width, by * height + y), pPixels + y * width, width * sizeof(color_quad_u8)); + } + + image_u8& swizzle(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + assert((r | g | b | a) <= 3); + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_quad_u8 tmp((*this)(x, y)); + (*this)(x, y).set(tmp[r], tmp[g], tmp[b], tmp[a]); + } + } + + return *this; + } + + struct pixel_coord + { + uint16_t m_x, m_y; + pixel_coord() { } + pixel_coord(uint32_t x, uint32_t y) : m_x((uint16_t)x), m_y((uint16_t)y) { } + }; + + uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, std::vector* pSet_pixels = nullptr); + + void draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color); + + inline void set_pixel_clipped_alphablend(int x, int y, const color_quad_u8& c) + { + if (is_clipped(x, y)) + return; + + color_quad_u8 ct(m_pixels[x + y * m_width]); + + ct.r = static_cast(ct.r + ((c.r - ct.r) * c.a) / 255); + ct.g = static_cast(ct.g + ((c.g - ct.g) * c.a) / 255); + ct.b = static_cast(ct.b + ((c.b - ct.b) * c.a) / 255); + + m_pixels[x + y * m_width] = ct; + } + +private: + color_quad_u8_vec m_pixels; + uint32_t m_width, m_height; + rect m_clip_rect; + + struct fill_segment + { + int16_t m_y, m_xl, m_xr, m_dy; + + fill_segment(int y, int xl, int xr, int dy) : + m_y((int16_t)y), m_xl((int16_t)xl), m_xr((int16_t)xr), m_dy((int16_t)dy) + { + } + }; + + inline bool flood_fill_is_inside(int x, int y, const color_quad_u8& b) const + { + if (is_clipped(x, y)) + return false; + + return (*this)(x, y) == b; + } + + void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color); + + void draw_aaline_pixel(int x, int y, int a, color_quad_u8 color) + { + color.a = static_cast(255 - a); + set_pixel_clipped_alphablend(x, y, color); + } +}; + +//bool load_png(const char* pFilename, image_u8& img); + +//bool save_png(const char* pFilename, const image_u8& img, bool save_alpha); + +class image_metrics +{ +public: + double m_max, m_mean, m_mean_squared, m_root_mean_squared, m_peak_snr; + + image_metrics() + { + clear(); + } + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void compute(const image_u8& a, const image_u8& b, uint32_t first_channel, uint32_t num_channels) + { + const bool average_component_error = true; + + const uint32_t width = std::min(a.width(), b.width()); + const uint32_t height = std::min(a.height(), b.height()); + + assert((first_channel < 4U) && (first_channel + num_channels <= 4U)); + + // Histogram approach originally due to Charles Bloom. + double hist[256]; + memset(hist, 0, sizeof(hist)); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + if (!num_channels) + hist[iabs(ca.get_luma() - cb.get_luma())]++; + else + { + for (uint32_t c = 0; c < num_channels; c++) + hist[iabs(ca[first_channel + c] - cb[first_channel + c])]++; + } + } + } + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + + m_max = std::max(m_max, i); + + double x = i * hist[i]; + + sum += x; + sum2 += i * x; + } + + // See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html + double total_values = width * height; + + if (average_component_error) + total_values *= clamp(num_channels, 1, 4); + + m_mean = clamp(sum / total_values, 0.0f, 255.0f); + m_mean_squared = clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); + + m_root_mean_squared = sqrt(m_mean_squared); + + if (!m_root_mean_squared) + m_peak_snr = 100.0f; + else + m_peak_snr = clamp(log10(255.0f / m_root_mean_squared) * 20.0f, 0.0f, 100.0f); + } +}; + +class imagef +{ +public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef& other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef& swap(imagef& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef& operator= (const imagef& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef& clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + m_pixels.resize(0); + return *this; + } + + imagef& set(const image_u8& src, const vec4F& scale = vec4F(1), const vec4F& bias = vec4F(0)) + { + const uint32_t width = src.width(); + const uint32_t height = src.height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef& resize(const imagef& other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef& resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return crop(w, h, p, background); + } + + imagef& set_all(const vec4F& c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef& fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_pixel_clipped(x + ix, y + iy, c); + return *this; + } + + imagef& crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_array cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef& set_pixel_clipped(int x, int y, const vec4F& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef& blit(const imagef& src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_pixel_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef& extract_block_clamped(vec4F* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef& set_block_clipped(const vec4F* pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_pixel_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_array& get_pixels() const { return m_pixels; } + inline vec4F_array& get_pixels() { return m_pixels; } + + inline const vec4F* get_ptr() const { return &m_pixels[0]; } + inline vec4F* get_ptr() { return &m_pixels[0]; } + +private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_array m_pixels; +}; + +enum +{ + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 +}; + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags); + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + +vec4F compute_ssim(const imagef& a, const imagef& b); + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma); + +struct block8 +{ + uint64_t m_vals[1]; +}; + +typedef std::vector block8_vec; + +struct block16 +{ + uint64_t m_vals[2]; +}; + +typedef std::vector block16_vec; + +bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header); + +void strip_extension(std::string& s); +void strip_path(std::string& s); + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + +// https://www.johndcook.com/blog/standard_deviation/ +// This class is for small numbers of integers, so precision shouldn't be an issue. +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() const { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_mean() const { return m_num ? (float)m_total / m_num : 0.0f; }; + + float get_variance() const { return m_num ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * m_num) : 0.0f; } + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + + float get_sample_variance() const { return (m_num > 1) ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * (m_num - 1)) : 0.0f; } + float get_sample_std_dev() const { return (m_num > 1) ? sqrtf(get_sample_variance()) : 0.0f; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +inline float compute_covariance(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, bool sample) +{ + const uint32_t n = a.get_number_of_values(); + assert(n == b.get_number_of_values()); + + if (!n) + { + assert(0); + return 0.0f; + } + if ((sample) && (n == 1)) + { + assert(0); + return 0; + } + + const float mean_a = a.get_mean(); + const float mean_b = b.get_mean(); + + float total = 0.0f; + for (uint32_t i = 0; i < n; i++) + total += (pA[i] - mean_a) * (pB[i] - mean_b); + + return total / (sample ? (n - 1) : n); +} + +inline float compute_correlation_coefficient(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, float c, bool sample) +{ + if (!a.get_number_of_values()) + return 1.0f; + + float covar = compute_covariance(pA, pB, a, b, sample); + float std_dev_a = sample ? a.get_sample_std_dev() : a.get_std_dev(); + float std_dev_b = sample ? b.get_sample_std_dev() : b.get_std_dev(); + float denom = std_dev_a * std_dev_b + c; + + if (denom < .0000125f) + return 1.0f; + + float result = (covar + c) / denom; + + return clamp(result, -1.0f, 1.0f); +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps); + +class rand +{ + std::mt19937 m_mt; + +public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + uint8_t byte() { return static_cast(urand32()); } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } +}; + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height); +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height); + +class value_stats +{ +public: + value_stats() + { + clear(); + } + + void clear() + { + m_sum = 0; + m_sum2 = 0; + m_num = 0; + m_min = 1e+39; + m_max = -1e+39; + m_vals.clear(); + } + + void add(double val) + { + m_sum += val; + m_sum2 += val * val; + + m_num++; + + m_min = std::min(m_min, val); + m_max = std::max(m_max, val); + + m_vals.push_back(val); + } + + void add(int val) + { + add(static_cast(val)); + } + + void add(uint32_t val) + { + add(static_cast(val)); + } + + void add(int64_t val) + { + add(static_cast(val)); + } + + void add(uint64_t val) + { + add(static_cast(val)); + } + + void print(const char* pPrefix = "") + { + if (!m_vals.size()) + printf("%s: Empty\n", pPrefix); + else + printf("%s: Samples: %llu, Total: %f, Avg: %f, Std Dev: %f, Min: %f, Max: %f, Mean: %f\n", + pPrefix, (unsigned long long)get_num(), get_total(), get_average(), get_std_dev(), get_min(), get_max(), get_mean()); + } + + double get_total() const + { + return m_sum; + } + + double get_average() const + { + return m_num ? (m_sum / m_num) : 0.0f; + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + uint64_t get_num() const + { + return m_num; + } + + double get_val(uint32_t index) const + { + return m_vals[index]; + } + + // Returns population standard deviation + double get_std_dev() const + { + if (!m_num) + return 0.0f; + + // TODO: FP precision + return sqrt((m_sum2 - ((m_sum * m_sum) / m_num)) / m_num); + } + + double get_mean() const + { + if (!m_num) + return 0.0f; + + std::vector sorted_vals(m_vals); + std::sort(sorted_vals.begin(), sorted_vals.end()); + + return sorted_vals[sorted_vals.size() / 2]; + } + +private: + double m_sum; + double m_sum2; + + uint64_t m_num; + + double m_min; + double m_max; + + mutable std::vector m_vals; +}; + +//uint32_t get_deflate_size(const void* pData, size_t data_size); + +bool read_file(const char* pFilename, uint8_vec& buf); + +} // namespace utils + +#ifdef _MSC_VER +#pragma warning (pop) +#endif \ No newline at end of file diff --git a/external/basis_universal/khronos/CMakeLists.txt b/external/basis_universal/khronos/CMakeLists.txt new file mode 100644 index 0000000000..0f5af88e07 --- /dev/null +++ b/external/basis_universal/khronos/CMakeLists.txt @@ -0,0 +1,300 @@ +# Important: The Basis Universal encoder and transcoder libraries must be compiled with -fno-strict-aliasing (MSVC's default, and also the Linux kernel). +# It should also work without this option, but we do not test with it. +cmake_minimum_required(VERSION 3.20) + +if (NOT CMAKE_OSX_DEPLOYMENT_TARGET) + # Needed otherwise Xcode builds with the default installed SDK which can often be + # more recent than the macOS version being used. + set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "macOS Deployment Target") +endif() + +project(basisu C CXX) + +option(BASISU_TOOL "Include basisu tool in build" TRUE) +option(BASISU_EXAMPLES "Include examples in build" TRUE) + +option(BASISU_STATIC "static linking" TRUE) +option(BASISU_SAN "sanitize" FALSE) + +# Using a generator expression here prevents multi-config generators (VS, Xcode, Ninja Multi-Config) +# from appending a per-configuration subdirectory. NOTE: This means the output could be overwritten +# by a subsequent build for a different configuration. +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY $<1:${CMAKE_CURRENT_SOURCE_DIR}/../bin>) + +# For MSVC builds default to SSE enabled, and determine if it's a 64-bit (-A x64) vs. 32-bit (-A Win32) build. +if (MSVC) + # TODO: Fix me for Windows ARM + option(BASISU_SSE "SSE 4.1 support" TRUE) + if ( CMAKE_GENERATOR_PLATFORM STREQUAL "Win32" ) + set(BASISU_BUILD_X64 0) + else() + set(BASISU_BUILD_X64 1) + endif() + add_compile_options(/W4) +else() + option(BASISU_SSE "SSE 4.1 support" FALSE) + option(BASISU_BUILD_X64 "build 64-bit" TRUE) +endif() + +option(BASISU_ZSTD "ZSTD support for KTX2 transcoding/encoding" TRUE) +option(BASISU_OPENCL "OpenCL support in encoder" FALSE) + +message("Initial BASISU_BUILD_X64=${BASISU_BUILD_X64}") +message("Initial CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") +message("Initial BASISU_SSE=${BASISU_SSE}") +message("Initial BASISU_ZSTD=${BASISU_ZSTD}") +message("Initial BASISU_OPENCL=${BASISU_OPENCL}") +message("Initial BASISU_SAN=${BASISU_SAN}") +message("initial BASISU_TOOL=${BASISU_TOOL}") +message("initial BASISU_EXAMPLES=${BASISU_EXAMPLES}") + +if(MINGW) + # Check if the Threads package is provided; if using Mingw it MIGHT be + find_package(Threads) +elseif(LINUX) + find_package(Threads REQUIRED) +endif() + +if ((NOT WIN32) AND BASISU_OPENCL) + # For Windows builds we use the Khronos lib/include files in the project's "OpenCL" directory, to completely avoid requiring fiddly to install vendor SDK's. + # Otherwise we use the system's (if any). + find_package(OpenCL REQUIRED) + message(STATUS "OpenCL found: ${OPENCL_FOUND}") + message(STATUS "OpenCL includes: ${OpenCL_INCLUDE_DIRS}") + message(STATUS "OpenCL libraries: ${OpenCL_LIBRARIES}") +endif() + +if( NOT CMAKE_BUILD_TYPE ) + set( CMAKE_BUILD_TYPE Release ) +endif() + +message(${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE}) + +if (BASISU_BUILD_X64) + message("Building 64-bit") +else() + message("Building 32-bit") +endif() + +if (BASISU_SSE) + message("SSE enabled") +else() + message("SSE disabled") +endif() + +if (BASISU_ZSTD) + message("Zstandard enabled") +else() + message("Zstandard disabled") +endif() + +if (NOT MSVC) + add_compile_options($<$:-g>) + # If you want to set an optimization option for non-debug too, use this instead. + #add_compile_options($,-g,-O3>) + + if (BASISU_SAN) + message("Enabling SAN") + + add_compile_options(-fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize=alignment) + endif() + + # Add common non-MSVC flags excluding -fPIC. + add_compile_options(-fvisibility=hidden -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 + -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Wno-unused-local-typedefs + -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable + -Wno-misleading-indentation + -Wno-maybe-uninitialized -Wno-unused-function + -Wno-stringop-overflow -Wno-unknown-warning-option) + # Add -fPIC ONLY on non-Windows platforms + if (NOT WIN32) + add_compile_options(-fPIC) + endif() + + # AppleClang 14 raises this warning in zstd.cpp. + add_compile_options("$<$,$,17>>:-Wno-bitwise-instead-of-logical>") + + add_compile_options("$<$:-Wno-reorder;-Wno-class-memaccess;-Wno-deprecated-copy>") + + add_compile_options($<$>:-m32>) + add_compile_definitions($<$:_DEBUG>) + if (EMSCRIPTEN) + add_link_options("SHELL:-s ALLOW_MEMORY_GROWTH=1") + endif() +else() + add_compile_options("$<$:-Wno-unused-variable;-Wno-unused-function>") +endif() + +# Define the source files for the static library +set(ENCODER_LIB_SRC_LIST + ../encoder/basisu_backend.cpp + ../encoder/basisu_basis_file.cpp + ../encoder/basisu_comp.cpp + ../encoder/basisu_enc.cpp + ../encoder/basisu_etc.cpp + ../encoder/basisu_frontend.cpp + ../encoder/basisu_gpu_texture.cpp + ../encoder/basisu_pvrtc1_4.cpp + ../encoder/basisu_resampler.cpp + ../encoder/basisu_resample_filters.cpp + ../encoder/basisu_ssim.cpp + ../encoder/basisu_uastc_enc.cpp + ../encoder/basisu_bc7enc.cpp + ../encoder/jpgd.cpp + ../encoder/basisu_kernels_sse.cpp + ../encoder/basisu_opencl.cpp + ../encoder/pvpngreader.cpp + ../encoder/basisu_uastc_hdr_4x4_enc.cpp + ../encoder/basisu_astc_hdr_6x6_enc.cpp + ../encoder/basisu_astc_hdr_common.cpp + ../encoder/basisu_astc_ldr_common.cpp + ../encoder/basisu_astc_ldr_encode.cpp + ../encoder/3rdparty/android_astc_decomp.cpp + ../encoder/3rdparty/tinyexr.cpp + ../transcoder/basisu_transcoder.cpp +) + +set(ENCODER_LIB_HDR_LIST + ../encoder/basisu_astc_hdr_6x6_enc.h + ../encoder/basisu_astc_hdr_common.h + ../encoder/basisu_astc_ldr_encode.h + ../encoder/basisu_backend.h + ../encoder/basisu_basis_file.h + ../encoder/basisu_bc7enc.h + ../encoder/basisu_comp.h + ../encoder/basisu_enc.h + ../encoder/basisu_etc.h + ../encoder/basisu_frontend.h + ../encoder/basisu_gpu_texture.h + ../encoder/basisu_kernels_declares.h + ../encoder/basisu_kernels_imp.h + ../encoder/basisu_math.h + ../encoder/basisu_miniz.h + ../encoder/basisu_ocl_kernels.h + ../encoder/basisu_opencl.h + ../encoder/basisu_pvrtc1_4.h + ../encoder/basisu_resampler_filters.h + ../encoder/basisu_resampler.h + ../encoder/basisu_ssim.h + ../encoder/basisu_uastc_enc.h + ../encoder/basisu_uastc_hdr_4x4_enc.h + ../encoder/cppspmd_flow.h + ../encoder/cppspmd_math_declares.h + ../encoder/cppspmd_math.h + ../encoder/cppspmd_sse.h + ../encoder/cppspmd_type_aliases.h + ../encoder/jpgd.h + ../encoder/pvpngreader.h + ../encoder/3rdparty/android_astc_decomp.h + ../encoder/3rdparty/qoi.h + ../encoder/3rdparty/tinyexr.h + ../transcoder/basisu_astc_hdr_core.h + ../transcoder/basisu_astc_helpers.h + ../transcoder/basisu_containers_impl.h + ../transcoder/basisu_containers.h + ../transcoder/basisu_file_headers.h + ../transcoder/basisu_transcoder_internal.h + ../transcoder/basisu_transcoder_uastc.h + ../transcoder/basisu_transcoder.h + ../transcoder/basisu.h + ../transcoder/basisu_idct.h +) + +if (BASISU_ZSTD) + set(ENCODER_LIB_SRC_LIST ${ENCODER_LIB_SRC_LIST} ../zstd/zstd.c) + set(ENCODER_LIB_HDR_LIST ${ENCODER_LIB_HDR_LIST} ../zstd/zstd.h) +endif() + +# Create the static library +add_library(basisu_encoder STATIC ${ENCODER_LIB_SRC_LIST} ${ENCODER_LIB_HDR_LIST}) + +target_include_directories(basisu_encoder +INTERFACE + $ + $ # So KTX-Software can use it. +) +# PUBLIC so it will be exported to dependent programs. +target_compile_features(basisu_encoder PUBLIC cxx_std_17) + +if (EMSCRIPTEN) + target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_SSE=0) +else() + target_compile_definitions(basisu_encoder PUBLIC + BASISU_SUPPORT_SSE=$,1,0> + ) + target_compile_options(basisu_encoder PRIVATE + "$<$,$>:-msse4.1>" + ) +endif() + +target_compile_definitions(basisu_encoder PRIVATE "BASISD_SUPPORT_KTX2_ZSTD=$,1,0>") +if (BASISU_OPENCL) + # basisu uses this to confirm the library has been compiled with OpenCL support hence PUBLIC. + target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_OPENCL=1) + if (NOT WIN32) # True when the target system is Windows. + # For Non-Windows builds, use the system OpenCL headers/libs, if cmake found them. + target_include_directories(basisu_encoder PRIVATE ${OpenCL_INCLUDE_DIRS}) + target_link_libraries(basisu_encoder PRIVATE ${OpenCL_LIBRARIES}) + else() + # For Windows builds, we use our local copies of the OpenCL import lib and Khronos headers. + target_include_directories(basisu_encoder PRIVATE "../OpenCL") + if (BASISU_BUILD_X64) + target_link_libraries(basisu_encoder PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../OpenCL/lib/OpenCL64.lib") + else() + target_link_libraries(basisu_encoder PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../OpenCL/lib/OpenCL.lib") + endif() + endif() +else() + target_compile_definitions(basisu_encoder PUBLIC BASISU_SUPPORT_OPENCL=0) +endif() + +if (NOT MSVC) + # Only link 'm' on non-Windows platforms (Linux, macOS) + if (NOT WIN32) + target_link_libraries(basisu_encoder INTERFACE m) + endif() + if(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + target_link_libraries(basisu_encoder INTERFACE Threads::Threads) + elseif(LINUX) + target_link_libraries(basisu_encoder INTERFACE dl Threads::Threads) + endif() + if (BASISU_STATIC AND MINGW) + target_link_options(basisu_encoder INTERFACE -static-libgcc -static-libstdc++ -static) + endif() +endif() + +macro(set_common_executable_properties target) + #if (MSVC) + target_sources(${target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../basisu.manifest") + #endif() + target_link_libraries(${target} PRIVATE basisu_encoder) + if (NOT BASISU_STATIC AND NOT EMSCRIPTEN AND NOT WIN32) + target_link_options(${target} PUBLIC -Wl,-rpath .) + endif() +endmacro() + +if (BASISU_TOOL) + # Create the basisu executable and link against the static library + add_executable(basisu ../basisu_tool.cpp) + set_common_executable_properties(basisu) +endif() + +if (BASISU_EXAMPLES) + # Create the new example executable and link against the static library + add_executable(examples ../example/example.cpp) + set_common_executable_properties(examples) +endif() + +if (BASISU_TOOL AND NOT EMSCRIPTEN) + if (UNIX) + if (CMAKE_BUILD_TYPE STREQUAL "Release") + if (APPLE) + add_custom_command(TARGET basisu POST_BUILD COMMAND strip -X -x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/basisu) + #message("strip command: strip -X -x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/basisu") + else() + add_custom_command(TARGET basisu POST_BUILD COMMAND strip -g -X -x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/basisu) + #message("strip command: strip -g -X -x ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/basisu") + endif() + endif() + endif() +endif() diff --git a/external/basis_universal/python/README.md b/external/basis_universal/python/README.md new file mode 100644 index 0000000000..1b07677833 --- /dev/null +++ b/external/basis_universal/python/README.md @@ -0,0 +1,90 @@ +Python support is still new and coming online, but is entirely functional. +The library's pure C (WASM friendly) API's are completely exposed to Python. Our next goal is to work on official [Wheels](https://pythonwheels.com/), once the API is settled and more examples are written. + +The Python integration first tries to use native .so's in the basisu_py +directory. If they don't exist, it tries the slower and single threaded WASM +fallbacks under basisu_py/wasm, which requires wasmtime for Python to be +installed. Some tests require an input.ktx2 or test.ktx2 to be in the current +directory. + +Building: + +Under the repo's root directory - build the native SO's: + +``` +mkdir build_python +cd build_python +cmake -DBASISU_BUILD_PYTHON=ON .. +make +``` + +Build the WASM modules (see README_WASI.md file for instructions on how to +install the WASI SDK, which is required): + +``` +mkdir build_wasm_st +cd build_wasm_st +cmake .. -DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=OFF +make +``` + +--- + +Running Tests +------------- + +The tests assume the current directory is "python". Under Windows we've tested with Python v3.12.10, and under Linux v3.12.13. + +Higher-level tests: + +- python3 -m tests.test_backend_loading +- python3 -m tests.test_basic_wasm_selection +- python3 -m tests.test_basic_backend_selection +- python3 -m tests.test_basic_decode +- python3 -m tests.test_basic_transcode +- python3 -m tests.test_compress_swirl +- python3 -m tests.test_compress_swirl_hdr +- python3 -m tests.test_transcoder_astc +- python3 -m tests.test_transcoder_backend_loading +- python3 -m tests.test_transcoder_end_to_end +- python3 -m tests.test_transcoder_end_to_end_hdr +- python3 -m tests.test_transcoder_helpers + +Low-level tests (used while bringing up the codec): + +- python3 -m lowlevel_test_native.basic_test +- python3 -m lowlevel_test_native.test_transcoder_basic +- python3 -m lowlevel_test_native.example_capi_python +- python3 -m lowlevel_test_wasm.basic_test +- python3 -m lowlevel_test_wasm.compress_test +- python3 -m lowlevel_test_wasm.compress_test_float + +Example output: + +``` +richg@ryzen9:/mnt/c/dev/bu_1_22_snapshot2/basis_universal-master/python$ python3 -m tests.test_backend_loading +========== BACKEND LOADING TEST ========== + +Testing native backend... +[Encoder] Using native backend + [OK] Native backend loaded +Hello from basisu_wasm_api.cpp version 200 + Native get_version() ? 200 + Native alloc() returned ptr = 685784256 + Native free() OK + [OK] Native basic operations working. + + +Testing WASM backend... +[WASM Encoder] Loaded: /mnt/c/dev/bu_1_22_snapshot2/basis_universal-master/python/basisu_py/wasm/basisu_module_st.wasm +[Encoder] Using WASM backend + [OK] WASM backend loaded +Hello from basisu_wasm_api.cpp version 200 + WASM get_version() ? 200 + WASM alloc() returned ptr = 26920160 + WASM free() OK + [OK] WASM basic operations working. + + +========== DONE ========== +``` diff --git a/external/basis_universal/python/README_win.md b/external/basis_universal/python/README_win.md new file mode 100644 index 0000000000..74a312b1fa --- /dev/null +++ b/external/basis_universal/python/README_win.md @@ -0,0 +1,85 @@ +Windows Native Python Build Instructions +======================================== + +This project uses pybind11 to build Python .pyd extension modules on Windows. +Because Windows installs multiple Python versions, and pybind11 currently only +supports up to Python 3.12, you must follow these steps exactly. + +Requirements +------------ +- Visual Studio Developer Command Prompt (VS C++ Build Tools installed) +- Python 3.12 (pybind11 does NOT support 3.13+ at the time of writing) +- pybind11 installed into Python 3.12 + +Check installed Python versions: + py -0 + +If Python 3.12 is missing: + winget install Python.Python.3.12 + +Install pybind11 for Python 3.12: + py -3.12 -m pip install pybind11 + +IMPORTANT: +You must build AND run with the same Python interpreter version (3.12). + +Building the .pyd Modules +------------------------- +Open the "Developer Command Prompt for Visual Studio". + +From the project root: + + mkdir build_python_win + cd build_python_win + +Run CMake using the exact path to python.exe for Python 3.12: + + cmake -G "Visual Studio 17 2022" -A x64 -DBASISU_BUILD_PYTHON=ON -DBASISU_BUILD_WASM=OFF -DPYTHON_EXECUTABLE="C:\Users\\AppData\Local\Programs\Python\Python312\python.exe" .. + +Build: + + cmake --build . --config Release + +Output files will be created in: + + python/basisu_py/basisu_python.pyd + python/basisu_py/basisu_transcoder_python.pyd + +Running the Modules +------------------- +Always run using Python 3.12: + + py -3.12 + +Inside Python: + + import basisu_py + print("Modules loaded OK.") + +While in the "python" directory: + + py -m tests.test_backend_loading + +WASM Backend (Optional) +----------------------- +Install wasmtime: + + py -3.12 -m pip install wasmtime + +Ensure these files exist: + + python/basisu_py/wasm/*.wasm + +Common Problems +--------------- +1. "pybind11 not found" + -> Installed into wrong Python version. Use: + py -3.12 -m pip install pybind11 + +2. "Python config failure" + -> You are using Python 3.13 or 3.14. Must use Python 3.12. + +3. Modules not loading + -> You must run them with the same interpreter used to build them: + py -3.12 + diff --git a/external/basis_universal/python/astc_writer.py b/external/basis_universal/python/astc_writer.py new file mode 100644 index 0000000000..5752620d72 --- /dev/null +++ b/external/basis_universal/python/astc_writer.py @@ -0,0 +1,83 @@ +# astc_writer.py +# +# Minimal ASTC writer that mirrors the C/C++ write_astc_file() logic from example_capi.c. +# Writes a valid single-slice 2D ASTC texture file (no array slices, no 3D, no mips). +# +# Usage: +# from astc_writer import write_astc_file +# write_astc_file("output.astc", blocks, block_width, block_height, width, height) +# +# "blocks" must be a bytes-like object containing the full ASTC block data +# using 16 bytes per block (standard ASTC block size). + + +def write_astc_file( + filename: str, + blocks: bytes, + block_width: int, + block_height: int, + width: int, + height: int +) -> None: + """ + Write an ASTC file to disk. + + Parameters: + filename : Output filename ("something.astc") + blocks : Bytes-like object containing ASTC blocks (16 bytes per block) + block_width : ASTC block width (e.g. 4-12) + block_height : ASTC block height (e.g. 4-12) + width : Original image width in pixels + height : Original image height in pixels + + Notes: + - ASTC files use 2D blocks; depth is always 1. + - Block layout goes row-major: (num_blocks_y num_blocks_x) blocks. + - No mipmaps are stored in this format. + """ + + # Validate block dimensions + if block_width < 4 or block_width > 12: + raise ValueError(f"ASTC block_width {block_width} out of range (412)") + if block_height < 4 or block_height > 12: + raise ValueError(f"ASTC block_height {block_height} out of range (412)") + + # Compute block grid + num_blocks_x = (width + block_width - 1) // block_width + num_blocks_y = (height + block_height - 1) // block_height + total_blocks = num_blocks_x * num_blocks_y + expected_size = total_blocks * 16 # 16 bytes per ASTC block (always) + + if len(blocks) != expected_size: + raise ValueError( + f"ASTC block buffer incorrect size: expected {expected_size}, got {len(blocks)}" + ) + + # Write file + with open(filename, "wb") as f: + # ASTC magic number (0x13AB A15C) + f.write(bytes([0x13, 0xAB, 0xA1, 0x5C])) + + # Block dims: x, y, z (z=1) + f.write(bytes([ + block_width & 0xFF, + block_height & 0xFF, + 1 + ])) + + # ASTC stores width/height/depth as 24-bit LE + def write_24bit_le(v: int): + f.write(bytes([ + v & 0xFF, + (v >> 8) & 0xFF, + (v >> 16) & 0xFF + ])) + + write_24bit_le(width) + write_24bit_le(height) + write_24bit_le(1) # depth + + # Write actual block payload + f.write(blocks) + + print(f"[ASTC Writer] Wrote: {filename} ({width}x{height}, {block_width}x{block_height} blocks)") diff --git a/external/basis_universal/python/basisu_encoder_pybind11.cpp b/external/basis_universal/python/basisu_encoder_pybind11.cpp new file mode 100644 index 0000000000..c6a698892b --- /dev/null +++ b/external/basis_universal/python/basisu_encoder_pybind11.cpp @@ -0,0 +1,109 @@ +// File: basisu_encoder_pybind11.cpp +// pybind11 native bindings for the compressor's pure C API basisu_wasm_api.h +#include +#include +#include + +// include the basisu compression plain C API +#include "../encoder/basisu_wasm_api.h" + +namespace py = pybind11; + +// Convert wasm_bool_t (uint32_t) ? Python bool +static inline bool to_bool(uint32_t v) { return v != 0; } + +PYBIND11_MODULE(basisu_python, m) { + m.doc() = "Native Basis Universal encoder (pybind11 binding over basisu_wasm_api)"; + + // + // Initialization / Version + // + m.def("init", &bu_init, "Initialize the BasisU codec library"); + m.def("get_version", &bu_get_version, "Return BASISU_LIB_VERSION"); + + // + // Memory allocation helpers + // + m.def("alloc", &bu_alloc, + "Allocate memory inside native heap and return pointer as uint64"); + m.def("free", &bu_free, + "Free previously allocated pointer"); + + // + // Compression params handles + // + m.def("new_params", &bu_new_comp_params, + "Create a new comp_params struct inside native heap"); + m.def("delete_params", + [](uint64_t h) { return to_bool(bu_delete_comp_params(h)); }, + "Destroy a comp_params struct"); + + m.def("params_clear", + [](uint64_t h) { return to_bool(bu_comp_params_clear(h)); }, + "Clear comp_params struct"); + + // + // Image upload API + // + m.def("set_image_rgba32", + [](uint64_t params, uint32_t index, + uint64_t img_ptr, uint32_t w, uint32_t h, uint32_t pitch) { + return to_bool(bu_comp_params_set_image_rgba32( + params, index, img_ptr, w, h, pitch)); + }, + "Set 8-bit RGBA32 image into parameters"); + + m.def("set_image_float_rgba", + [](uint64_t params, uint32_t index, + uint64_t img_ptr, uint32_t w, uint32_t h, uint32_t pitch) { + return to_bool(bu_comp_params_set_image_float_rgba( + params, index, img_ptr, w, h, pitch)); + }, + "Set float32 RGBA image into parameters"); + + // + // Compression + // + m.def("compress", + [](uint64_t params, + int tex_format, + int quality, + int effort, + uint64_t flags, + float rdo_quality) + { + return to_bool(bu_compress_texture( + params, tex_format, quality, effort, flags, rdo_quality)); + }, + py::arg("params"), + py::arg("tex_format"), + py::arg("quality"), + py::arg("effort"), + py::arg("flags"), + py::arg("rdo_quality") = 0.0f + ); + + // + // Output blob access + // + m.def("get_comp_data_size", + &bu_comp_params_get_comp_data_size, + "Return size (bytes) of compressed output"); + m.def("get_comp_data_ofs", + &bu_comp_params_get_comp_data_ofs, + "Return pointer (uint64) to compressed output buffer"); + + // Memory read/write + m.def("read_memory", + [](uint64_t ptr, uint32_t size) { + return py::bytes((const char*)ptr, size); + }, + "Read `size` bytes starting at native memory address `ptr`"); + + m.def("write_memory", + [](uint64_t dest_ptr, py::buffer src) { + py::buffer_info info = src.request(); + memcpy((void*)dest_ptr, info.ptr, info.size * info.itemsize); + }, + "Write bytes/buffer-like object into native memory at address `ptr`"); +} diff --git a/external/basis_universal/python/basisu_py/MANIFEST.in b/external/basis_universal/python/basisu_py/MANIFEST.in new file mode 100644 index 0000000000..993a897b05 --- /dev/null +++ b/external/basis_universal/python/basisu_py/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include basisu_py *.py *.so *.wasm +include README.md diff --git a/external/basis_universal/python/basisu_py/READMD.md b/external/basis_universal/python/basisu_py/READMD.md new file mode 100644 index 0000000000..ca5adf873d --- /dev/null +++ b/external/basis_universal/python/basisu_py/READMD.md @@ -0,0 +1,5 @@ +This is the Python support directory for the Basis Universal KTX2 compressor +and transcoder modules. + +License: Apache 2.0 + diff --git a/external/basis_universal/python/basisu_py/__init__.py b/external/basis_universal/python/basisu_py/__init__.py new file mode 100644 index 0000000000..2186700a29 --- /dev/null +++ b/external/basis_universal/python/basisu_py/__init__.py @@ -0,0 +1,35 @@ +""" +basisu_py +========= +Python bindings for the Basis Universal encoder and transcoder, with +automatic fallback between native C++ extensions and WASM modules. + +Main entry points: + - Transcoder : basisu_py.transcoder.Transcoder + - Encoder : basisu_py.codec.Encoder + - constants : basisu_py.constants +""" + +from .codec import Encoder +from .transcoder import Transcoder, KTX2Handle +from .constants import ( + BasisTexFormat, + BasisQuality, + BasisEffort, + BasisFlags, + TranscoderTextureFormat, + TranscodeDecodeFlags, +) + +# What the package publicly exposes +__all__ = [ + "Encoder", + "Transcoder", + "KTX2Handle", + "BasisTexFormat", + "BasisQuality", + "BasisEffort", + "BasisFlags", + "TranscoderTextureFormat", + "TranscodeDecodeFlags", +] diff --git a/external/basis_universal/python/basisu_py/basisu_python.cpython-312-x86_64-linux-gnu.so b/external/basis_universal/python/basisu_py/basisu_python.cpython-312-x86_64-linux-gnu.so new file mode 100644 index 0000000000..5211d53530 Binary files /dev/null and b/external/basis_universal/python/basisu_py/basisu_python.cpython-312-x86_64-linux-gnu.so differ diff --git a/external/basis_universal/python/basisu_py/basisu_python.pyd b/external/basis_universal/python/basisu_py/basisu_python.pyd new file mode 100644 index 0000000000..cf69d4b76f Binary files /dev/null and b/external/basis_universal/python/basisu_py/basisu_python.pyd differ diff --git a/external/basis_universal/python/basisu_py/basisu_transcoder_python.cpython-312-x86_64-linux-gnu.so b/external/basis_universal/python/basisu_py/basisu_transcoder_python.cpython-312-x86_64-linux-gnu.so new file mode 100644 index 0000000000..b00bf6dffc Binary files /dev/null and b/external/basis_universal/python/basisu_py/basisu_transcoder_python.cpython-312-x86_64-linux-gnu.so differ diff --git a/external/basis_universal/python/basisu_py/basisu_transcoder_python.pyd b/external/basis_universal/python/basisu_py/basisu_transcoder_python.pyd new file mode 100644 index 0000000000..c1c5d586ba Binary files /dev/null and b/external/basis_universal/python/basisu_py/basisu_transcoder_python.pyd differ diff --git a/external/basis_universal/python/basisu_py/codec.py b/external/basis_universal/python/basisu_py/codec.py new file mode 100644 index 0000000000..e12b22cf0d --- /dev/null +++ b/external/basis_universal/python/basisu_py/codec.py @@ -0,0 +1,222 @@ +# basisu_py/codec.py + +import importlib +import numpy as np +from PIL import Image +import ctypes + +from .constants import BasisTexFormat, BasisQuality, BasisEffort, BasisFlags +from pathlib import Path + +class EncoderBackend: + NATIVE = "native" + WASM = "wasm" + AUTO = "auto" + +class Encoder: + + def __init__(self, backend=EncoderBackend.AUTO): + self.backend = backend + self._native = None + self._wasm = None + self.backend_name = None + + # ------------------------------------------------------------------ + # Try native first (AUTO or NATIVE modes) + # ------------------------------------------------------------------ + if backend in (EncoderBackend.AUTO, EncoderBackend.NATIVE): + try: + import basisu_py.basisu_python as native_encoder + native_encoder.init() + + self._native = native_encoder + self._wasm = None + self.backend_name = "NATIVE" + + print("[Encoder] Using native backend") + return + + except Exception as e: + if backend == EncoderBackend.NATIVE: + raise RuntimeError( + f"[Encoder] Native backend requested but unavailable: {e}" + ) + print("[Encoder] Native unavailable; falling back to WASM:", e) + + # ------------------------------------------------------------------ + # Fallback to WASM (AUTO or explicitly WASM) + # ------------------------------------------------------------------ + try: + from basisu_py.wasm.wasm_encoder import BasisuWasmEncoder + except Exception as e: + raise RuntimeError( + f"[Encoder] WASM backend cannot be imported: {e}\n" + "Make sure wasmtime is installed and basisu_py/wasm/*.wasm exist." + ) + + wasm_path = Path(__file__).parent / "wasm" / "basisu_module_st.wasm" + self._wasm = BasisuWasmEncoder(str(wasm_path)) + self._wasm.load() + self._native = None + self.backend_name = "WASM" + + print("[Encoder] Using WASM backend") + + + # ------------------------------------------------------ + # Public API + # ------------------------------------------------------ + def compress(self, + image, + format=-1, + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB | BasisFlags.THREADED | BasisFlags.XUASTC_LDR_FULL_ZSTD): + + rgba_bytes, w, h, is_hdr = self._convert_input_to_rgba_bytes(image) + + # Auto-select format if user passed -1 + if format == -1: + if is_hdr: + format = BasisTexFormat.cUASTC_HDR_6x6 + else: + format = BasisTexFormat.cXUASTC_LDR_6x6 + + if self._native: + return self._compress_native(rgba_bytes, w, h, format, quality, effort, flags, is_hdr) + else: + return self._compress_wasm(rgba_bytes, w, h, format, quality, effort, flags, is_hdr) + + def compress_float32(self, arr, **kwargs): + if not isinstance(arr, np.ndarray) or arr.dtype != np.float32: + raise ValueError("compress_float32 requires float32 NumPy HxWx4 array") + + return self.compress(arr, **kwargs) + + # ------------------------------------------------------ + # Native backend + # ------------------------------------------------------ + def _compress_native(self, bytes_data, w, h, fmt, quality, effort, flags, is_hdr=False): + enc = self._native + + params = enc.new_params() + + try: + buf_ptr = enc.alloc(len(bytes_data)) + + # Write raw bytes (uint8 or float32) + ctypes.memmove(buf_ptr, bytes_data, len(bytes_data)) + + if is_hdr: + ok = enc.set_image_float_rgba(params, 0, buf_ptr, w, h, w * 16) # 4 floats = 16 bytes per pixel + else: + ok = enc.set_image_rgba32(params, 0, buf_ptr, w, h, w * 4) + + if not ok: + raise RuntimeError("Native encoder: set_image failed (HDR or LDR)") + + ok = enc.compress(params, fmt, quality, effort, flags, 0.0) + if not ok: + raise RuntimeError("Native encoder: compress() failed") + + size = enc.get_comp_data_size(params) + ofs = enc.get_comp_data_ofs(params) + blob = enc.read_memory(ofs, size) + return blob + + finally: + enc.delete_params(params) + if buf_ptr: + enc.free(buf_ptr) + + # ------------------------------------------------------ + # WASM backend + # ------------------------------------------------------ + def _compress_wasm(self, bytes_data, w, h, fmt, quality, effort, flags, is_hdr=False): + enc = self._wasm + + params = enc.new_params() + + try: + buf_ptr = enc.alloc(len(bytes_data)) + enc.write_bytes(buf_ptr, bytes_data) + + if is_hdr: + ok = enc.set_image_float_rgba(params, 0, buf_ptr, w, h, w * 16) + else: + ok = enc.set_image_rgba32(params, 0, buf_ptr, w, h, w * 4) + + if not ok: + raise RuntimeError("WASM encoder: set_image failed (HDR or LDR)") + + ok = enc.compress(params, fmt, quality, effort, flags, 0.0) + if not ok: + raise RuntimeError("WASM encoder: compress() failed") + + size = enc.get_comp_data_size(params) + ofs = enc.get_comp_data_ofs(params) + blob = enc.read_bytes(ofs, size) + return blob + + finally: + enc.delete_params(params) + if buf_ptr: + enc.free(buf_ptr) + + # ------------------------------------------------------ + # Image conversion + # ------------------------------------------------------ + def _convert_input_to_rgba_bytes(self, image): + """ + Accept: + - Pillow Image (LDR) -> returns uint8 bytes + - NumPy uint8 LDR -> returns uint8 bytes + - NumPy float32 HDR -> returns float32 bytes + Returns (bytes, width, height, is_hdr) + """ + + # Pillow image -> LDR + if isinstance(image, Image.Image): + image = image.convert("RGBA") + arr = np.array(image, dtype=np.uint8) + h, w = arr.shape[:2] + return arr.tobytes(), w, h, False + + # NumPy array + elif isinstance(image, np.ndarray): + + # HDR float32 image + if image.dtype == np.float32: + if image.ndim != 3 or image.shape[2] not in (3,4): + raise ValueError("HDR NumPy image must be HxWx3 or HxWx4 float32") + + h, w, c = image.shape + + # Expand RGB -> RGBA if needed + if c == 3: + alpha = np.ones((h, w, 1), dtype=np.float32) + arr = np.concatenate([image, alpha], axis=2) + else: + arr = image + + return arr.tobytes(), w, h, True + + # LDR uint8 image + if image.dtype == np.uint8: + if image.ndim != 3 or image.shape[2] not in (3,4): + raise ValueError("LDR NumPy image must be HxWx3 or HxWx4 uint8") + + h, w, c = image.shape + + if c == 3: + alpha = np.full((h, w, 1), 255, dtype=np.uint8) + arr = np.concatenate([image, alpha], axis=2) + else: + arr = image + + return arr.tobytes(), w, h, False + + raise ValueError("NumPy image must be uint8 (LDR) or float32 (HDR)") + + else: + raise TypeError("compress() expects Pillow Image or NumPy array") diff --git a/external/basis_universal/python/basisu_py/constants.py b/external/basis_universal/python/basisu_py/constants.py new file mode 100644 index 0000000000..04be0b0137 --- /dev/null +++ b/external/basis_universal/python/basisu_py/constants.py @@ -0,0 +1,183 @@ +# basisu_constants.py + +# ============================================================ +# .KTX2/.basis file types +# basist::basis_tex_format +# ============================================================ +class BasisTexFormat: + # Original LDR formats + cETC1S = 0 + cUASTC_LDR_4x4 = 1 + + # HDR + cUASTC_HDR_4x4 = 2 + cASTC_HDR_6x6 = 3 + cUASTC_HDR_6x6 = 4 + + # XUASTC supercompressed LDR formats + cXUASTC_LDR_4x4 = 5 + cXUASTC_LDR_5x4 = 6 + cXUASTC_LDR_5x5 = 7 + cXUASTC_LDR_6x5 = 8 + cXUASTC_LDR_6x6 = 9 + cXUASTC_LDR_8x5 = 10 + cXUASTC_LDR_8x6 = 11 + cXUASTC_LDR_10x5 = 12 + cXUASTC_LDR_10x6 = 13 + cXUASTC_LDR_8x8 = 14 + cXUASTC_LDR_10x8 = 15 + cXUASTC_LDR_10x10= 16 + cXUASTC_LDR_12x10= 17 + cXUASTC_LDR_12x12= 18 + + # Standard ASTC LDR + cASTC_LDR_4x4 = 19 + cASTC_LDR_5x4 = 20 + cASTC_LDR_5x5 = 21 + cASTC_LDR_6x5 = 22 + cASTC_LDR_6x6 = 23 + cASTC_LDR_8x5 = 24 + cASTC_LDR_8x6 = 25 + cASTC_LDR_10x5 = 26 + cASTC_LDR_10x6 = 27 + cASTC_LDR_8x8 = 28 + cASTC_LDR_10x8 = 29 + cASTC_LDR_10x10= 30 + cASTC_LDR_12x10= 31 + cASTC_LDR_12x12= 32 + +# ============================================================ +# Unified quality level: 1-100 (higher=better quality, 100 disables some codec options) +# ============================================================ +class BasisQuality: + MIN = 1 + MAX = 100 + +# ============================================================ +# Unified effort level: 0-10 (0=fastest, 10=very slow, higher=slower but higher potential quality/more features utilized) +# ============================================================ +class BasisEffort: + MIN = 0 + MAX = 10 + + SUPER_FAST = 0 + FAST = 2 + NORMAL = 5 + DEFAULT = 2 + SLOW = 8 + VERY_SLOW = 10 + +# ============================================================ +# C-style API flags +# ============================================================ +class BasisFlags: + NONE = 0 + USE_OPENCL = 1 << 8 + THREADED = 1 << 9 + DEBUG_OUTPUT = 1 << 10 + + KTX2_OUTPUT = 1 << 11 + KTX2_UASTC_ZSTD = 1 << 12 + + SRGB = 1 << 13 + GEN_MIPS_CLAMP = 1 << 14 + GEN_MIPS_WRAP = 1 << 15 + + Y_FLIP = 1 << 16 + + PRINT_STATS = 1 << 18 + PRINT_STATUS = 1 << 19 + + DEBUG_IMAGES = 1 << 20 + + REC2020 = 1 << 21 + VALIDATE_OUTPUT = 1 << 22 + + XUASTC_LDR_FULL_ARITH = 0 + XUASTC_LDR_HYBRID = 1 << 23 + XUASTC_LDR_FULL_ZSTD = 2 << 23 + XUASTC_LDR_SYNTAX_SHIFT = 23 + XUASTC_LDR_SYNTAX_MASK = 3 + + TEXTURE_TYPE_2D = 0 << 25 + TEXTURE_TYPE_2D_ARRAY = 1 << 25 + TEXTURE_TYPE_CUBEMAP_ARRAY = 2 << 25 + TEXTURE_TYPE_VIDEO_FRAMES = 3 << 25 + TEXTURE_TYPE_SHIFT = 25 + TEXTURE_TYPE_MASK = 3 + + VERBOSE = PRINT_STATS | PRINT_STATUS + MIPMAP_CLAMP = GEN_MIPS_CLAMP + MIPMAP_WRAP = GEN_MIPS_WRAP + +# ============================================================ +# Transcoder Texture Formats (GPU block formats) +# basist::transcoder_texture_format +# ============================================================ +class TranscoderTextureFormat: + TF_ETC1_RGB = 0 + TF_ETC2_RGBA = 1 + TF_BC1_RGB = 2 + TF_BC3_RGBA = 3 + TF_BC4_R = 4 + TF_BC5_RG = 5 + TF_BC7_RGBA = 6 + + TF_PVRTC1_4_RGB = 8 + TF_PVRTC1_4_RGBA = 9 + + TF_ASTC_LDR_4X4_RGBA = 10 + TF_ATC_RGB = 11 + TF_ATC_RGBA = 12 + + # Uncompressed + TF_RGBA32 = 13 + TF_RGB565 = 14 + TF_BGR565 = 15 + TF_RGBA4444 = 16 + + TF_FXT1_RGB = 17 + TF_PVRTC2_4_RGB = 18 + TF_PVRTC2_4_RGBA = 19 + + TF_ETC2_EAC_R11 = 20 + TF_ETC2_EAC_RG11 = 21 + TF_BC6H = 22 + + TF_ASTC_HDR_4X4_RGBA = 23 + + TF_RGB_HALF = 24 + TF_RGBA_HALF = 25 + TF_RGB_9E5 = 26 + TF_ASTC_HDR_6X6_RGBA = 27 + + TF_ASTC_LDR_5X4_RGBA = 28 + TF_ASTC_LDR_5X5_RGBA = 29 + TF_ASTC_LDR_6X5_RGBA = 30 + TF_ASTC_LDR_6X6_RGBA = 31 + TF_ASTC_LDR_8X5_RGBA = 32 + TF_ASTC_LDR_8X6_RGBA = 33 + TF_ASTC_LDR_10X5_RGBA = 34 + TF_ASTC_LDR_10X6_RGBA = 35 + TF_ASTC_LDR_8X8_RGBA = 36 + TF_ASTC_LDR_10X8_RGBA = 37 + TF_ASTC_LDR_10X10_RGBA= 38 + TF_ASTC_LDR_12X10_RGBA= 39 + TF_ASTC_LDR_12X12_RGBA= 40 + + TOTAL = 41 + +# ============================================================ +# Transcoder Decode Flags +# ============================================================ +class TranscodeDecodeFlags: + PVRTC_DECODE_TO_NEXT_POW2 = 2 + TRANSCODE_ALPHA_TO_OPAQUE = 4 + BC1_FORBID_THREE_COLOR_BLOCKS = 8 + OUTPUT_HAS_ALPHA_INDICES = 16 + HIGH_QUALITY = 32 + NO_ETC1S_CHROMA_FILTERING = 64 + NO_DEBLOCK_FILTERING = 128 + STRONGER_DEBLOCK_FILTERING = 256 + FORCE_DEBLOCK_FILTERING = 512 + XUASTC_LDR_DISABLE_FAST_BC7_TRANSCODING = 1024 diff --git a/external/basis_universal/python/basisu_py/transcoder.py b/external/basis_universal/python/basisu_py/transcoder.py new file mode 100644 index 0000000000..948b516c0e --- /dev/null +++ b/external/basis_universal/python/basisu_py/transcoder.py @@ -0,0 +1,735 @@ +# basisu_py/transcoder.py +import numpy as np +from dataclasses import dataclass +from pathlib import Path + +from basisu_py.constants import ( + TranscoderTextureFormat, +) + +import importlib +import ctypes + + +# --------------------------------------------------------------------------- +# Enum to select backend +# --------------------------------------------------------------------------- +class TranscoderBackend: + NATIVE = "native" + WASM = "wasm" + AUTO = "auto" + + +# --------------------------------------------------------------------------- +# Wrapper class storing pointer+handle +# --------------------------------------------------------------------------- +@dataclass +class KTX2Handle: + ptr: int + handle: int + + +# --------------------------------------------------------------------------- +# Main Transcoder class +# --------------------------------------------------------------------------- +class Transcoder: + def __init__(self, backend=TranscoderBackend.AUTO): + self._native = None + self._wasm = None + self.backend_name = None + self.backend = None + + use_native = False + + # ------------------------------------------------------------------ + # Try native backend first if AUTO or NATIVE + # ------------------------------------------------------------------ + if backend in (TranscoderBackend.AUTO, TranscoderBackend.NATIVE): + try: + native_mod = importlib.import_module("basisu_py.basisu_transcoder_python") + native_mod.init() + self._native = native_mod + self.backend = native_mod + self.backend_name = "NATIVE" + use_native = True + print("[Transcoder] Using native backend") + except Exception as e: + if backend == TranscoderBackend.NATIVE: + # Caller explicitly requested native - fail hard + raise RuntimeError(f"Native transcoder backend failed: {e}") + print("[Transcoder] Native backend unavailable, reason:", e) + self._native = None + + # ------------------------------------------------------------------ + # Fallback to WASM if native is not being used + # ------------------------------------------------------------------ + if not use_native: + try: + from basisu_py.wasm.wasm_transcoder import BasisuWasmTranscoder + except Exception as e: + raise RuntimeError( + f"WASM backend cannot be imported: {e}\n" + "Ensure that:\n" + " - 'wasmtime' is installed\n" + " - basisu_py/wasm/*.wasm files are present in the install\n" + ) + + wasm_path = Path(__file__).parent / "wasm" / "basisu_transcoder_module_st.wasm" + self._wasm = BasisuWasmTranscoder(str(wasm_path)) + self._wasm.load() + self.backend = self._wasm + self.backend_name = "WASM" + print("[Transcoder] Using WASM backend") + + # Finally, bind the unified API to whichever backend we chose + self._bind_backend(self.backend) + + # ----------------------------------------------------------------------- + # Unified backend binding (native or wasm) + # ----------------------------------------------------------------------- + def _bind_backend(self, b): + self.backend = b + + # ------------------ memory operations ------------------ + memory_mapping = [ + ("_alloc", "alloc"), + ("_free", "free"), + ("_write", "write_memory"), + ("_read", "read_memory"), + ] + + # ------------------ KTX2 core ------------------ + basis_mapping = [ + # basis_tex_format helpers + ("basis_tex_format_is_xuastc_ldr", "basis_tex_format_is_xuastc_ldr"), + ("basis_tex_format_is_astc_ldr", "basis_tex_format_is_astc_ldr"), + ("basis_tex_format_get_block_width", "basis_tex_format_get_block_width"), + ("basis_tex_format_get_block_height", "basis_tex_format_get_block_height"), + ("basis_tex_format_is_hdr", "basis_tex_format_is_hdr"), + ("basis_tex_format_is_ldr", "basis_tex_format_is_ldr"), + + # transcoder_texture_format helpers + ("basis_get_bytes_per_block_or_pixel", "basis_get_bytes_per_block_or_pixel"), + ("basis_transcoder_format_has_alpha", "basis_transcoder_format_has_alpha"), + ("basis_transcoder_format_is_hdr", "basis_transcoder_format_is_hdr"), + ("basis_transcoder_format_is_ldr", "basis_transcoder_format_is_ldr"), + ("basis_transcoder_texture_format_is_astc", "basis_transcoder_texture_format_is_astc"), + ("basis_transcoder_format_is_uncompressed", "basis_transcoder_format_is_uncompressed"), + ("basis_get_uncompressed_bytes_per_pixel", "basis_get_uncompressed_bytes_per_pixel"), + ("basis_get_block_width", "basis_get_block_width"), + ("basis_get_block_height", "basis_get_block_height"), + ("basis_get_transcoder_texture_format_from_basis_tex_format","basis_get_transcoder_texture_format_from_basis_tex_format"), + ("basis_is_format_supported", "basis_is_format_supported"), + ("basis_compute_transcoded_image_size_in_bytes","basis_compute_transcoded_image_size_in_bytes"), + ] + + ktx2_mapping = [ + + ("ktx2_open", "ktx2_open"), + ("ktx2_close", "ktx2_close"), + + ("ktx2_get_width", "ktx2_get_width"), + ("ktx2_get_height", "ktx2_get_height"), + ("ktx2_get_levels", "ktx2_get_levels"), + ("ktx2_get_faces", "ktx2_get_faces"), + ("ktx2_get_layers", "ktx2_get_layers"), + + ("ktx2_get_basis_tex_format", "ktx2_get_basis_tex_format"), + + ("ktx2_get_block_width", "ktx2_get_block_width"), + ("ktx2_get_block_height", "ktx2_get_block_height"), + + ("ktx2_has_alpha", "ktx2_has_alpha"), + + # flags + ("ktx2_is_hdr", "ktx2_is_hdr"), + ("ktx2_is_hdr_4x4", "ktx2_is_hdr_4x4"), + ("ktx2_is_hdr_6x6", "ktx2_is_hdr_6x6"), + ("ktx2_is_ldr", "ktx2_is_ldr"), + ("ktx2_is_srgb", "ktx2_is_srgb"), + ("ktx2_is_etc1s", "ktx2_is_etc1s"), + ("ktx2_is_uastc_ldr_4x4", "ktx2_is_uastc_ldr_4x4"), + ("ktx2_is_xuastc_ldr", "ktx2_is_xuastc_ldr"), + ("ktx2_is_astc_ldr", "ktx2_is_astc_ldr"), + ("ktx2_is_video", "ktx2_is_video"), + ("ktx2_get_ldr_hdr_upconversion_nit_multiplier", "ktx2_get_ldr_hdr_upconversion_nit_multiplier"), + + # DFD access + ("ktx2_get_dfd_flags", "ktx2_get_dfd_flags"), + ("ktx2_get_dfd_total_samples", "ktx2_get_dfd_total_samples"), + ("ktx2_get_dfd_channel_id0", "ktx2_get_dfd_channel_id0"), + ("ktx2_get_dfd_channel_id1", "ktx2_get_dfd_channel_id1"), + ("ktx2_get_dfd_color_model", "ktx2_get_dfd_color_model"), + ("ktx2_get_dfd_color_primaries", "ktx2_get_dfd_color_primaries"), + ("ktx2_get_dfd_transfer_func", "ktx2_get_dfd_transfer_func"), + + # per-level info + ("ktx2_get_level_orig_width", "ktx2_get_level_orig_width"), + ("ktx2_get_level_orig_height", "ktx2_get_level_orig_height"), + ("ktx2_get_level_actual_width", "ktx2_get_level_actual_width"), + ("ktx2_get_level_actual_height", "ktx2_get_level_actual_height"), + + ("ktx2_get_level_num_blocks_x", "ktx2_get_level_num_blocks_x"), + ("ktx2_get_level_num_blocks_y", "ktx2_get_level_num_blocks_y"), + ("ktx2_get_level_total_blocks", "ktx2_get_level_total_blocks"), + + ("ktx2_get_level_alpha_flag", "ktx2_get_level_alpha_flag"), + ("ktx2_get_level_iframe_flag", "ktx2_get_level_iframe_flag"), + + # transcoding + ("ktx2_start_transcoding", "ktx2_start_transcoding"), + ("ktx2_transcode_image_level", "ktx2_transcode_image_level"), + + # version + ("get_version_fn", "get_version"), + ] + + # Apply all mappings + for public_name, backend_name in (memory_mapping + ktx2_mapping + basis_mapping): + setattr(self, public_name, getattr(b, backend_name)) + + # ----------------------------------------------------------------------- + # Public version query + # ----------------------------------------------------------------------- + def get_version(self): + return self.get_version_fn() + + # ----------------------------------------------------------------------- + # Enable library debug printing to stdout (also set BASISU_FORCE_DEVEL_MESSAGES to 1 in transcoder/basisu.h) + # ----------------------------------------------------------------------- + def enable_debug_printf(self, flag: bool = True): + return self.backend.enable_debug_printf(flag) + + # ----------------------------------------------------------------------- + # KTX2 Handle API: open/close + all queries + # ----------------------------------------------------------------------- + def open(self, ktx2_bytes: bytes) -> KTX2Handle: + ptr = self._alloc(len(ktx2_bytes)) + self._write(ptr, ktx2_bytes) + handle = self.ktx2_open(ptr, len(ktx2_bytes)) + return KTX2Handle(ptr, handle) + + def close(self, ktx2_handle: KTX2Handle): + self.ktx2_close(ktx2_handle.handle) + self._free(ktx2_handle.ptr) + + # ---- Basic queries ---- + def get_width(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_width(ktx2_handle.handle) + + def get_height(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_height(ktx2_handle.handle) + + def get_levels(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_levels(ktx2_handle.handle) + + def get_faces(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_faces(ktx2_handle.handle) + + def get_layers(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_layers(ktx2_handle.handle) + + def get_basis_tex_format(self, ktx2_handle: KTX2Handle): + return self.ktx2_get_basis_tex_format(ktx2_handle.handle) + + def has_alpha(self, ktx2_handle: KTX2Handle) -> bool: + """ + Return true if the KTX2 container has alpha. + """ + return bool(self.ktx2_has_alpha(ktx2_handle.handle)) + + # ---- Format flags ---- + def is_hdr(self, ktx2_handle): return bool(self.ktx2_is_hdr(ktx2_handle.handle)) + def is_hdr_4x4(self, ktx2_handle): return bool(self.ktx2_is_hdr_4x4(ktx2_handle.handle)) + def is_hdr_6x6(self, ktx2_handle): return bool(self.ktx2_is_hdr_6x6(ktx2_handle.handle)) + def is_ldr(self, ktx2_handle): return bool(self.ktx2_is_ldr(ktx2_handle.handle)) + def is_srgb(self, ktx2_handle): return bool(self.ktx2_is_srgb(ktx2_handle.handle)) + def is_video(self, ktx2_handle): return bool(self.ktx2_is_video(ktx2_handle.handle)) + def get_ldr_hdr_upconversion_nit_multiplier(self, ktx2_handle): return self.ktx2_get_ldr_hdr_upconversion_nit_multiplier(ktx2_handle.handle) + def is_etc1s(self, ktx2_handle): return bool(self.ktx2_is_etc1s(ktx2_handle.handle)) + def is_uastc_ldr_4x4(self, ktx2_handle): return bool(self.ktx2_is_uastc_ldr_4x4(ktx2_handle.handle)) + def is_xuastc_ldr(self, ktx2_handle): return bool(self.ktx2_is_xuastc_ldr(ktx2_handle.handle)) + def is_astc_ldr(self, ktx2_handle): return bool(self.ktx2_is_astc_ldr(ktx2_handle.handle)) + + # ---- DFD access + def get_dfd_flags(self, ktx2_handle): return self.ktx2_get_dfd_flags(ktx2_handle.handle) + def get_dfd_total_samples(self, ktx2_handle): return self.ktx2_get_dfd_total_samples(ktx2_handle.handle) + def get_dfd_color_model(self, ktx2_handle): return self.ktx2_get_dfd_color_model(ktx2_handle.handle) + def get_dfd_color_primaries(self, ktx2_handle): return self.ktx2_get_dfd_color_primaries(ktx2_handle.handle) + def get_dfd_transfer_func(self, ktx2_handle): return self.ktx2_get_dfd_transfer_func(ktx2_handle.handle) + def get_dfd_channel_id0(self, ktx2_handle): return self.ktx2_get_dfd_channel_id0(ktx2_handle.handle) + def get_dfd_channel_id1(self, ktx2_handle): return self.ktx2_get_dfd_channel_id1(ktx2_handle.handle) + + # ---- Block dimensions ---- + def get_block_width(self, ktx2_handle): return self.ktx2_get_block_width(ktx2_handle.handle) + def get_block_height(self, ktx2_handle): return self.ktx2_get_block_height(ktx2_handle.handle) + + # ----------------------------------------------------------------------- + # Explicit: start transcoding on an already-open KTX2 file + # ----------------------------------------------------------------------- + def start_transcoding(self, ktx2_handle: KTX2Handle): + """ + Must be called before per-level iframe flags become valid. + """ + ok = self.ktx2_start_transcoding(ktx2_handle.handle) + if not ok: + raise RuntimeError("start_transcoding() failed") + return True + + # ---- Level info ---- + def get_level_orig_width(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_orig_width(ktx2_handle.handle, level, layer, face) + + def get_level_orig_height(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_orig_height(ktx2_handle.handle, level, layer, face) + + def get_level_actual_width(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_actual_width(ktx2_handle.handle, level, layer, face) + + def get_level_actual_height(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_actual_height(ktx2_handle.handle, level, layer, face) + + def get_level_num_blocks_x(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_num_blocks_x(ktx2_handle.handle, level, layer, face) + + def get_level_num_blocks_y(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_num_blocks_y(ktx2_handle.handle, level, layer, face) + + def get_level_total_blocks(self, ktx2_handle, level, layer=0, face=0): + return self.ktx2_get_level_total_blocks(ktx2_handle.handle, level, layer, face) + + def get_level_alpha_flag(self, ktx2_handle, level, layer=0, face=0): + return bool(self.ktx2_get_level_alpha_flag(ktx2_handle.handle, level, layer, face)) + + def get_level_iframe_flag(self, ktx2_handle, level, layer=0, face=0): + return bool(self.ktx2_get_level_iframe_flag(ktx2_handle.handle, level, layer, face)) + + # ----------------------------------------------------------------------- + # Low-level: Decode RGBA8 from an already-open KTX2 handle + # ----------------------------------------------------------------------- + def decode_rgba_handle(self, ktx2_handle: KTX2Handle, level=0, layer=0, face=0): + """ + Low-level fast decode. Requires an already-open KTX2Handle. + Returns HxWx4 uint8 NumPy array. + """ + w = self.ktx2_get_level_orig_width(ktx2_handle.handle, level, layer, face) + h = self.ktx2_get_level_orig_height(ktx2_handle.handle, level, layer, face) + + out_size = w * h * 4 + out_ptr = self._alloc(out_size) + + # MUST start transcoding before ANY decode + ok = self.ktx2_start_transcoding(ktx2_handle.handle) + if not ok: + self._free(out_ptr) + raise RuntimeError("start_transcoding failed") + + ok = self.ktx2_transcode_image_level( + ktx2_handle.handle, + level, layer, face, + out_ptr, + out_size // 4, + TranscoderTextureFormat.TF_RGBA32, + 0, 0, 0, -1, -1, 0 + ) + if not ok: + self._free(out_ptr) + raise RuntimeError("transcode_image_level failed") + + raw_bytes = self._read(out_ptr, out_size) + self._free(out_ptr) + + arr = np.frombuffer(raw_bytes, dtype=np.uint8) + return arr.reshape((h, w, 4)) + + # ----------------------------------------------------------------------- + # High-level: Decode RGBA8 directly from KTX2 file data + # ----------------------------------------------------------------------- + def decode_rgba(self, ktx2_bytes: bytes, level=0, layer=0, face=0): + """ + High-level convenience decode. Opens the KTX2 file bytes for you. + """ + ktx2_handle = self.open(ktx2_bytes) + try: + return self.decode_rgba_handle(ktx2_handle, level, layer, face) + finally: + self.close(ktx2_handle) + + # ----------------------------------------------------------------------- + # Low-level: Decode HDR (RGBA float32) from open KTX2 + # ----------------------------------------------------------------------- + def decode_rgba_hdr_handle(self, ktx2_handle: KTX2Handle, level=0, layer=0, face=0): + """ + Low-level HDR decode. Returns HxWx4 float32 NumPy array. + """ + w = self.ktx2_get_level_orig_width(ktx2_handle.handle, level, layer, face) + h = self.ktx2_get_level_orig_height(ktx2_handle.handle, level, layer, face) + + bytes_per_pixel = 8 # 4 * half-float + out_size = w * h * bytes_per_pixel + out_ptr = self._alloc(out_size) + + ok = self.ktx2_start_transcoding(ktx2_handle.handle) + if not ok: + self._free(out_ptr) + raise RuntimeError("start_transcoding failed") + + ok = self.ktx2_transcode_image_level( + ktx2_handle.handle, + level, layer, face, + out_ptr, + out_size // bytes_per_pixel, + TranscoderTextureFormat.TF_RGBA_HALF, + 0, 0, 0, -1, -1, 0 + ) + if not ok: + self._free(out_ptr) + raise RuntimeError("transcode_image_level failed") + + raw_bytes = self._read(out_ptr, out_size) + self._free(out_ptr) + + arr = np.frombuffer(raw_bytes, dtype=np.float16).astype(np.float32) + return arr.reshape((h, w, 4)) + + # ----------------------------------------------------------------------- + # High-level: Decode HDR (RGBA float32) from KTX2 file data + # ----------------------------------------------------------------------- + def decode_rgba_hdr(self, ktx2_bytes: bytes, level=0, layer=0, face=0): + """ + High-level convenience HDR decode. Opens the KTX2 file bytes for you. + """ + ktx2_handle = self.open(ktx2_bytes) + try: + return self.decode_rgba_hdr_handle(ktx2_handle, level, layer, face) + finally: + self.close(ktx2_handle) + + # ----------------------------------------------------------------------- + # Low-level: General-purpose transcode using a chosen TranscoderTextureFormat format + # ----------------------------------------------------------------------- + def transcode_tfmt_handle(self, ktx2_handle: KTX2Handle, tfmt: int, + level=0, layer=0, face=0, decode_flags=0, + channel0=-1, channel1=-1): + """ + Low-level direct transcoding from an already-open KTX2 handle. + + Parameters: + ktx2_handle: KTX2Handle -> already-open KTX2 + tfmt: int -> TranscoderTextureFormat to transcode to (for ASTC: block size and LDR/HDR MUST match the KTX2 file, for HDR: must be a HDR texture format) + level/layer/face: int -> which image slice to decode + decode_flags: int -> basist::decode_flags + row_pitch, rows_in_pixels, channel0, channel1 -> advanced options + + Returns: bytes (transcoded GPU texture data or uncompressed image) + """ + + # Determine actual output size in bytes + ow = self.ktx2_get_level_orig_width(ktx2_handle.handle, level, layer, face) + oh = self.ktx2_get_level_orig_height(ktx2_handle.handle, level, layer, face) + + out_size = self.basis_compute_transcoded_image_size_in_bytes(tfmt, ow, oh) + if out_size == 0: + raise RuntimeError("basis_compute_transcoded_image_size_in_bytes returned 0") + + # print(f"*** ow={ow}, oh={oh}, out_size={out_size}") + + out_ptr = self._alloc(out_size) + + # Call transcoder + ok = self.ktx2_start_transcoding(ktx2_handle.handle) + if not ok: + self._free(out_ptr) + raise RuntimeError("start_transcoding failed") + + ok = self.ktx2_transcode_image_level( + ktx2_handle.handle, + level, layer, face, + out_ptr, + out_size // self.basis_get_bytes_per_block_or_pixel(tfmt), + tfmt, + decode_flags, + 0, + 0, + channel0, channel1, + 0 # no per-thread state object + ) + if not ok: + self._free(out_ptr) + raise RuntimeError("ktx2_transcode_image_level failed") + + # Extract bytes + raw_bytes = self._read(out_ptr, out_size) + + self._free(out_ptr) + return raw_bytes + + # ----------------------------------------------------------------------- + # High-level: General-purpose transcode (opens the KTX2 for you) + # tfmt: the TranscoderTextureFormat to transcode too + # ----------------------------------------------------------------------- + def transcode_tfmt(self, ktx2_bytes: bytes, tfmt: int, + level=0, layer=0, face=0, decode_flags=0, + channel0=-1, channel1=-1): + """ + High-level convenience wrapper for transcode_tfmt_handle(). + Automatically opens/closes the KTX2 file. + """ + ktx2_handle = self.open(ktx2_bytes) + try: + return self.transcode_tfmt_handle( + ktx2_handle, tfmt, + level=level, + layer=layer, + face=face, + decode_flags=decode_flags, + channel0=channel0, + channel1=channel1 + ) + finally: + self.close(ktx2_handle) + + # ----------------------------------------------------------------------- + # Low-level: choose a specific transcoder_texture_format from a family string + # ----------------------------------------------------------------------- + def choose_transcoder_format(self, ktx2_handle: KTX2Handle, family: str) -> int: + """ + Given an already-opened KTX2 and a desired family string, choose a concrete + TranscoderTextureFormat enum. + + family: one of: + "ASTC", "BC1", "BC3", "BC4", "BC5", "BC6H", "BC7", + "PVRTC1", "PVRTC2", + "ETC1", "ETC2", "ETC2_EAC_R11", "ETC2_EAC_RG11", + "ATC", "FXT1", + "RGBA32", "RGB_HALF", "RGBA_HALF", "RGB_FLOAT", "RGBA_FLOAT", + "RGB_9E5" + + Returns: + int: TranscoderTextureFormat value + """ + + s = family.strip().upper().replace(" ", "") + hdr_tex = self.is_hdr(ktx2_handle) + has_alpha = self.has_alpha(ktx2_handle) + basis_fmt = self.get_basis_tex_format(ktx2_handle) + + tfmt = None + + # ------------------------------------------------------------------- + # Uncompressed families + # ------------------------------------------------------------------- + if s in ("RGBA32", "RGBA8", "UNCOMPRESSED"): + tfmt = TranscoderTextureFormat.TF_RGBA32 + + elif s in ("RGBHALF", "RGB16F", "RGB_FLOAT", "RGBFLOAT"): + tfmt = TranscoderTextureFormat.TF_RGB_HALF + + elif s in ("RGBAHALF", "RGBA16F", "RGBA_FLOAT", "RGBAFLOAT"): + tfmt = TranscoderTextureFormat.TF_RGBA_HALF + + elif s in ("RGB9E5", "RGB_9E5"): + tfmt = TranscoderTextureFormat.TF_RGB_9E5 + + # ------------------------------------------------------------------- + # BC families + # ------------------------------------------------------------------- + elif s == "BC1": + tfmt = TranscoderTextureFormat.TF_BC1_RGB + elif s == "BC3": + tfmt = TranscoderTextureFormat.TF_BC3_RGBA + elif s == "BC4": + tfmt = TranscoderTextureFormat.TF_BC4_R + elif s == "BC5": + tfmt = TranscoderTextureFormat.TF_BC5_RG + elif s == "BC6H": + tfmt = TranscoderTextureFormat.TF_BC6H + elif s == "BC7": + tfmt = TranscoderTextureFormat.TF_BC7_RGBA + + # ------------------------------------------------------------------- + # PVRTC families + # ------------------------------------------------------------------- + elif s == "PVRTC1": + tfmt = (TranscoderTextureFormat.TF_PVRTC1_4_RGBA + if has_alpha else TranscoderTextureFormat.TF_PVRTC1_4_RGB) + + elif s == "PVRTC2": + tfmt = (TranscoderTextureFormat.TF_PVRTC2_4_RGBA + if has_alpha else TranscoderTextureFormat.TF_PVRTC2_4_RGB) + + # ------------------------------------------------------------------- + # ETC / EAC families + # ------------------------------------------------------------------- + elif s == "ETC1": + tfmt = TranscoderTextureFormat.TF_ETC1_RGB + + elif s == "ETC2": + tfmt = TranscoderTextureFormat.TF_ETC2_RGBA + + elif s in ("ETC2_EAC_R11", "EAC_R11"): + tfmt = TranscoderTextureFormat.TF_ETC2_EAC_R11 + + elif s in ("ETC2_EAC_RG11", "EAC_RG11"): + tfmt = TranscoderTextureFormat.TF_ETC2_EAC_RG11 + + # ------------------------------------------------------------------- + # ATC / FXT + # ------------------------------------------------------------------- + elif s == "ATC": + tfmt = (TranscoderTextureFormat.TF_ATC_RGBA + if has_alpha else TranscoderTextureFormat.TF_ATC_RGB) + + elif s == "FXT1": + tfmt = TranscoderTextureFormat.TF_FXT1_RGB + + # ------------------------------------------------------------------- + # ASTC family + # ------------------------------------------------------------------- + elif s == "ASTC": + # Let BasisU decide correct ASTC format (block size + LDR/HDR) + tfmt = self.basis_get_transcoder_texture_format_from_basis_tex_format(basis_fmt) + + else: + # Unknown family: choose a safe uncompressed default + if hdr_tex: + tfmt = TranscoderTextureFormat.TF_RGBA_HALF + else: + tfmt = TranscoderTextureFormat.TF_RGBA32 + + # ------------------------------------------------------------------- + # Validate HDR/LDR compatibility (optional but recommended) + # ------------------------------------------------------------------- + # Use helpers to ensure we don't do HDR->LDR or LDR->HDR accidentally. + is_tfmt_hdr = self.basis_transcoder_format_is_hdr(tfmt) + if hdr_tex and not is_tfmt_hdr: + raise ValueError(f"Requested {family} (LDR transcoder format) for HDR KTX2.") + if not hdr_tex and is_tfmt_hdr: + raise ValueError(f"Requested {family} (HDR transcoder format) for LDR KTX2.") + + return tfmt + + # ----------------------------------------------------------------------- + # Low-level: General-purpose transcode using a family string + # from an already opened ktx2 file. + # Returns: + # (data_bytes, chosen_tfmt, block_width, block_height) + # ----------------------------------------------------------------------- + def transcode_handle( + self, + ktx2_handle: KTX2Handle, + family: str, + level=0, + layer=0, + face=0, + decode_flags=0, + channel0=-1, + channel1=-1 + ): + """ + Low-level direct transcoding from an already-open KTX2 handle, + using a high-level family string such as: + "BC7", "BC3", "BC1", "ETC1", "ETC2", "ASTC", "PVRTC1", + "RGBA32", "RGB_HALF", "RGBA_HALF", "RGB_9E5", etc. + See choose_transcoder_format(). + Returns: + (data_bytes, tfmt, block_width, block_height) + """ + + # Decide the exact transcoder format (BC1/BC7/etc.) + tfmt = self.choose_transcoder_format(ktx2_handle, family) + + # Get original dims of the requested slice + ow = self.get_level_orig_width(ktx2_handle, level, layer, face) + oh = self.get_level_orig_height(ktx2_handle, level, layer, face) + + # Compute correct output size for the chosen format + out_size = self.basis_compute_transcoded_image_size_in_bytes(tfmt, ow, oh) + if out_size == 0: + raise RuntimeError( + f"Computed output size is 0 for tfmt={tfmt}, dims={ow}x{oh}" + ) + + # Allocate output buffer + out_ptr = self._alloc(out_size) + + # Ensure transcoding tables are ready + ok = self.ktx2_start_transcoding(ktx2_handle.handle) + if not ok: + self._free(out_ptr) + raise RuntimeError("start_transcoding failed") + + # Perform the transcode + ok = self.ktx2_transcode_image_level( + ktx2_handle.handle, + level, layer, face, + out_ptr, + out_size // self.basis_get_bytes_per_block_or_pixel(tfmt), + tfmt, + decode_flags, + 0, # row_pitch_in_blocks_or_pixels + 0, # rows_in_pixels + channel0, + channel1, + 0 # no thread-local state + ) + if not ok: + self._free(out_ptr) + raise RuntimeError("ktx2_transcode_image_level failed") + + # Extract bytes from native/WASM memory + data_bytes = self._read(out_ptr, out_size) + + # Free the output buffer + self._free(out_ptr) + + # Determine block dims for this texture format + if self.basis_transcoder_format_is_uncompressed(tfmt): + bw = None + bh = None + else: + bw = self.basis_get_block_width(tfmt) + bh = self.basis_get_block_height(tfmt) + + return data_bytes, tfmt, bw, bh + + # ----------------------------------------------------------------------- + # High-level: one-shot transcode using a family string + # directly from ktx2 file data. (Slower if you're transcoding multiple + # levels/faces/layers.) + # ----------------------------------------------------------------------- + def transcode( + self, + ktx2_bytes: bytes, + family: str, + level=0, + layer=0, + face=0, + decode_flags=0, + channel0=-1, + channel1=-1 + ): + """ + High-level version of transcode_handle(). + Calls transcode_handle() internally. + + Returns: + (data_bytes, tfmt, block_width, block_height) + """ + ktx2_handle = self.open(ktx2_bytes) + try: + return self.transcode_handle( + ktx2_handle, + family, + level=level, + layer=layer, + face=face, + decode_flags=decode_flags, + channel0=channel0, + channel1=channel1 + ) + finally: + self.close(ktx2_handle) + + def tfmt_name(self, tfmt: int): + return TranscoderTextureFormat(tfmt).name diff --git a/external/basis_universal/python/basisu_py/wasm/__init__.py b/external/basis_universal/python/basisu_py/wasm/__init__.py new file mode 100644 index 0000000000..76d8f38a1b --- /dev/null +++ b/external/basis_universal/python/basisu_py/wasm/__init__.py @@ -0,0 +1 @@ +# Purposely empty diff --git a/external/basis_universal/python/basisu_py/wasm/basisu_module_mt.wasm b/external/basis_universal/python/basisu_py/wasm/basisu_module_mt.wasm new file mode 100644 index 0000000000..ae2b436293 Binary files /dev/null and b/external/basis_universal/python/basisu_py/wasm/basisu_module_mt.wasm differ diff --git a/external/basis_universal/python/basisu_py/wasm/basisu_module_st.wasm b/external/basis_universal/python/basisu_py/wasm/basisu_module_st.wasm new file mode 100644 index 0000000000..5b88cbc3a5 Binary files /dev/null and b/external/basis_universal/python/basisu_py/wasm/basisu_module_st.wasm differ diff --git a/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_mt.wasm b/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_mt.wasm new file mode 100644 index 0000000000..311c940759 Binary files /dev/null and b/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_mt.wasm differ diff --git a/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_st.wasm b/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_st.wasm new file mode 100644 index 0000000000..3c56386550 Binary files /dev/null and b/external/basis_universal/python/basisu_py/wasm/basisu_transcoder_module_st.wasm differ diff --git a/external/basis_universal/python/basisu_py/wasm/wasm_encoder.py b/external/basis_universal/python/basisu_py/wasm/wasm_encoder.py new file mode 100644 index 0000000000..e6d3516ab5 --- /dev/null +++ b/external/basis_universal/python/basisu_py/wasm/wasm_encoder.py @@ -0,0 +1,126 @@ +# basisu_py/wasm/wasm_encoder.py + +import wasmtime +import ctypes + +from ..constants import BasisTexFormat, BasisQuality, BasisEffort, BasisFlags + + +class BasisuWasmEncoder: + def __init__(self, wasm_path): + self.wasm_path = wasm_path + self.engine = None + self.store = None + self.memory = None + self.exports = None + + # ------------------------------------------------------ + # Initialize WASM + WASI + # ------------------------------------------------------ + def _init_engine(self): + self.engine = wasmtime.Engine() + self.store = wasmtime.Store(self.engine) + + wasi = wasmtime.WasiConfig() + wasi.argv = ["basisu-wasm"] + wasi.inherit_stdout() + wasi.inherit_stderr() + self.store.set_wasi(wasi) + + def load(self): + self._init_engine() + + module = wasmtime.Module.from_file(self.engine, self.wasm_path) + linker = wasmtime.Linker(self.engine) + linker.define_wasi() + + instance = linker.instantiate(self.store, module) + self.exports = instance.exports(self.store) + self.memory = self.exports["memory"] + + # Initialize if present + if "bu_init" in self.exports: + self.exports["bu_init"](self.store) + + print("[WASM Encoder] Loaded:", self.wasm_path) + + # ------------------------------------------------------ + # Access raw linear memory buffer + # ------------------------------------------------------ + def _buf(self): + raw_ptr = self.memory.data_ptr(self.store) + size = self.memory.data_len(self.store) + addr = ctypes.addressof(raw_ptr.contents) + return (ctypes.c_ubyte * size).from_address(addr) + + # ------------------------------------------------------ + # Version + # ------------------------------------------------------ + def get_version(self): + return self.exports["bu_get_version"](self.store) + + # ------------------------------------------------------ + # Memory alloc/free + # ------------------------------------------------------ + def alloc(self, size): + return self.exports["bu_alloc"](self.store, size) + + def free(self, ptr): + self.exports["bu_free"](self.store, ptr) + + # ------------------------------------------------------ + # Params + # ------------------------------------------------------ + def new_params(self): + return self.exports["bu_new_comp_params"](self.store) + + def delete_params(self, params): + return self.exports["bu_delete_comp_params"](self.store, params) + + # ------------------------------------------------------ + # Image input + # ------------------------------------------------------ + def set_image_rgba32(self, params, index, ptr, w, h, pitch): + return self.exports["bu_comp_params_set_image_rgba32"]( + self.store, params, index, ptr, w, h, pitch + ) + + def set_image_float_rgba(self, params, index, ptr, w, h, pitch): + return self.exports["bu_comp_params_set_image_float_rgba"]( + self.store, params, index, ptr, w, h, pitch + ) + + # ------------------------------------------------------ + # Compression + # ------------------------------------------------------ + def compress(self, params, fmt, quality, effort, flags, rdo): + return bool(self.exports["bu_compress_texture"]( + self.store, params, fmt, quality, effort, flags, rdo + )) + + # ------------------------------------------------------ + # Output blob + # ------------------------------------------------------ + def get_comp_data_size(self, params): + return self.exports["bu_comp_params_get_comp_data_size"](self.store, params) + + def get_comp_data_ofs(self, params): + return self.exports["bu_comp_params_get_comp_data_ofs"](self.store, params) + + # ------------------------------------------------------ + # Raw memory I/O + # ------------------------------------------------------ + def write_bytes(self, ptr, data): + buf = self._buf() + buf[ptr:ptr + len(data)] = data + + def read_bytes(self, ptr, size): + buf = self._buf() + return bytes(buf[ptr:ptr + size]) + + # NEW unified names: + def write_memory(self, ptr, data): + self.write_bytes(ptr, data) + + def read_memory(self, ptr, size): + return self.read_bytes(ptr, size) diff --git a/external/basis_universal/python/basisu_py/wasm/wasm_transcoder.py b/external/basis_universal/python/basisu_py/wasm/wasm_transcoder.py new file mode 100644 index 0000000000..01e96ee604 --- /dev/null +++ b/external/basis_universal/python/basisu_py/wasm/wasm_transcoder.py @@ -0,0 +1,326 @@ +# basisu_py/wasm/wasm_transcoder.py + +import wasmtime +import ctypes + + +class BasisuWasmTranscoder: + """ + Lowest-level WASM transcoder wrapper. + Direct mapping to basisu_wasm_transcoder_api.h/.cpp + + NOTE: + - This layer does NOT interpret formats or block sizes. + - It only wraps the raw C API (bt_* and basis_* exports). + - Higher-level logic (TranscoderCore, Transcoder) will build on top. + """ + + def __init__(self, wasm_path: str): + self.wasm_path = wasm_path + self.engine = None + self.store = None + self.memory = None + self.exports = None + + # ------------------------------------------------------ + # Internal: initialize WASM + WASI + # ------------------------------------------------------ + def _init_engine(self): + self.engine = wasmtime.Engine() + self.store = wasmtime.Store(self.engine) + + wasi = wasmtime.WasiConfig() + wasi.argv = ["basisu-transcoder"] + wasi.inherit_stdout() + wasi.inherit_stderr() + self.store.set_wasi(wasi) + + def load(self): + self._init_engine() + + module = wasmtime.Module.from_file(self.engine, self.wasm_path) + linker = wasmtime.Linker(self.engine) + linker.define_wasi() + + instance = linker.instantiate(self.store, module) + self.exports = instance.exports(self.store) + self.memory = self.exports["memory"] + + # Mandatory transcoder init + if "bt_init" in self.exports: + self.exports["bt_init"](self.store) + + print("[WASM Transcoder] Loaded:", self.wasm_path) + + # ------------------------------------------------------ + # Linear memory access helpers + # ------------------------------------------------------ + def _buf(self): + raw_ptr = self.memory.data_ptr(self.store) + size = self.memory.data_len(self.store) + addr = ctypes.addressof(raw_ptr.contents) + return (ctypes.c_ubyte * size).from_address(addr) + + def write_bytes(self, ptr: int, data: bytes): + buf = self._buf() + buf[ptr:ptr + len(data)] = data + + def read_bytes(self, ptr: int, num: int) -> bytes: + buf = self._buf() + return bytes(buf[ptr:ptr + num]) + + # NEW unified names: + def write_memory(self, ptr, data): + self.write_bytes(ptr, data) + + def read_memory(self, ptr, size): + return self.read_bytes(ptr, size) + + # ------------------------------------------------------ + # Memory alloc/free + # ------------------------------------------------------ + def alloc(self, size: int) -> int: + return self.exports["bt_alloc"](self.store, size) + + def free(self, ptr: int): + return self.exports["bt_free"](self.store, ptr) + + # ------------------------------------------------------ + # High-level functions: version, init, debug + # ------------------------------------------------------ + def get_version(self) -> int: + return self.exports["bt_get_version"](self.store) + + def enable_debug_printf(self, flag: bool = True): + return self.exports["bt_enable_debug_printf"](self.store, 1 if flag else 0) + + # ------------------------------------------------------ + # basis_tex_format helpers + # ------------------------------------------------------ + def basis_tex_format_is_xuastc_ldr(self, basis_tex_fmt_u32: int) -> bool: + return bool(self.exports["bt_basis_tex_format_is_xuastc_ldr"](self.store, basis_tex_fmt_u32)) + + def basis_tex_format_is_astc_ldr(self, basis_tex_fmt_u32: int) -> bool: + return bool(self.exports["bt_basis_tex_format_is_astc_ldr"](self.store, basis_tex_fmt_u32)) + + def basis_tex_format_get_block_width(self, basis_tex_fmt_u32: int) -> int: + return self.exports["bt_basis_tex_format_get_block_width"](self.store, basis_tex_fmt_u32) + + def basis_tex_format_get_block_height(self, basis_tex_fmt_u32: int) -> int: + return self.exports["bt_basis_tex_format_get_block_height"](self.store, basis_tex_fmt_u32) + + def basis_tex_format_is_hdr(self, basis_tex_fmt_u32: int) -> bool: + return bool(self.exports["bt_basis_tex_format_is_hdr"](self.store, basis_tex_fmt_u32)) + + def basis_tex_format_is_ldr(self, basis_tex_fmt_u32: int) -> bool: + return bool(self.exports["bt_basis_tex_format_is_ldr"](self.store, basis_tex_fmt_u32)) + + # ------------------------------------------------------ + # transcoder_texture_format helpers + # ------------------------------------------------------ + def basis_get_bytes_per_block_or_pixel(self, tfmt_u32: int) -> int: + return self.exports["bt_basis_get_bytes_per_block_or_pixel"](self.store, tfmt_u32) + + def basis_transcoder_format_has_alpha(self, tfmt_u32: int) -> bool: + return bool(self.exports["bt_basis_transcoder_format_has_alpha"](self.store, tfmt_u32)) + + def basis_transcoder_format_is_hdr(self, tfmt_u32: int) -> bool: + return bool(self.exports["bt_basis_transcoder_format_is_hdr"](self.store, tfmt_u32)) + + def basis_transcoder_format_is_ldr(self, tfmt_u32: int) -> bool: + return bool(self.exports["bt_basis_transcoder_format_is_ldr"](self.store, tfmt_u32)) + + def basis_transcoder_texture_format_is_astc(self, tfmt_u32: int) -> bool: + return bool(self.exports["bt_basis_transcoder_texture_format_is_astc"](self.store, tfmt_u32)) + + def basis_transcoder_format_is_uncompressed(self, tfmt_u32: int) -> bool: + return bool(self.exports["bt_basis_transcoder_format_is_uncompressed"](self.store, tfmt_u32)) + + def basis_get_uncompressed_bytes_per_pixel(self, tfmt_u32: int) -> int: + return self.exports["bt_basis_get_uncompressed_bytes_per_pixel"](self.store, tfmt_u32) + + def basis_get_block_width(self, tfmt_u32: int) -> int: + return self.exports["bt_basis_get_block_width"](self.store, tfmt_u32) + + def basis_get_block_height(self, tfmt_u32: int) -> int: + return self.exports["bt_basis_get_block_height"](self.store, tfmt_u32) + + def basis_get_transcoder_texture_format_from_basis_tex_format(self, basis_tex_fmt_u32: int) -> int: + return self.exports["bt_basis_get_transcoder_texture_format_from_basis_tex_format"](self.store, basis_tex_fmt_u32) + + def basis_is_format_supported(self, tfmt_u32: int, basis_tex_fmt_u32: int) -> bool: + return bool(self.exports["bt_basis_is_format_supported"](self.store, tfmt_u32, basis_tex_fmt_u32)) + + def basis_compute_transcoded_image_size_in_bytes(self, tfmt_u32: int, orig_width: int, orig_height: int) -> int: + return self.exports["bt_basis_compute_transcoded_image_size_in_bytes"]( + self.store, tfmt_u32, orig_width, orig_height + ) + + # ------------------------------------------------------ + # KTX2 handle management + # ------------------------------------------------------ + def ktx2_open(self, data_ptr: int, data_len: int) -> int: + return self.exports["bt_ktx2_open"](self.store, data_ptr, data_len) + + def ktx2_close(self, handle: int): + return self.exports["bt_ktx2_close"](self.store, handle) + + # ------------------------------------------------------ + # Basic KTX2 metadata + # ------------------------------------------------------ + def ktx2_get_width(self, handle: int) -> int: + return self.exports["bt_ktx2_get_width"](self.store, handle) + + def ktx2_get_height(self, handle: int) -> int: + return self.exports["bt_ktx2_get_height"](self.store, handle) + + def ktx2_get_levels(self, handle: int) -> int: + return self.exports["bt_ktx2_get_levels"](self.store, handle) + + def ktx2_get_faces(self, handle: int) -> int: + return self.exports["bt_ktx2_get_faces"](self.store, handle) + + def ktx2_get_layers(self, handle: int) -> int: + return self.exports["bt_ktx2_get_layers"](self.store, handle) + + def ktx2_get_basis_tex_format(self, handle: int) -> int: + return self.exports["bt_ktx2_get_basis_tex_format"](self.store, handle) + + # KTX2 format checks + def ktx2_is_etc1s(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_etc1s"](self.store, handle)) + + def ktx2_is_uastc_ldr_4x4(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_uastc_ldr_4x4"](self.store, handle)) + + def ktx2_is_hdr(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_hdr"](self.store, handle)) + + def ktx2_is_hdr_4x4(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_hdr_4x4"](self.store, handle)) + + def ktx2_is_hdr_6x6(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_hdr_6x6"](self.store, handle)) + + def ktx2_is_ldr(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_ldr"](self.store, handle)) + + def ktx2_is_astc_ldr(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_astc_ldr"](self.store, handle)) + + def ktx2_is_xuastc_ldr(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_xuastc_ldr"](self.store, handle)) + + def ktx2_get_block_width(self, handle: int) -> int: + return self.exports["bt_ktx2_get_block_width"](self.store, handle) + + def ktx2_get_block_height(self, handle: int) -> int: + return self.exports["bt_ktx2_get_block_height"](self.store, handle) + + def ktx2_has_alpha(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_has_alpha"](self.store, handle)) + + def ktx2_get_dfd_color_model(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_color_model"](self.store, handle) + + def ktx2_get_dfd_color_primaries(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_color_primaries"](self.store, handle) + + def ktx2_get_dfd_transfer_func(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_transfer_func"](self.store, handle) + + def ktx2_is_srgb(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_srgb"](self.store, handle)) + + def ktx2_get_dfd_flags(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_flags"](self.store, handle) + + def ktx2_get_dfd_total_samples(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_total_samples"](self.store, handle) + + def ktx2_get_dfd_channel_id0(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_channel_id0"](self.store, handle) + + def ktx2_get_dfd_channel_id1(self, handle: int) -> int: + return self.exports["bt_ktx2_get_dfd_channel_id1"](self.store, handle) + + def ktx2_is_video(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_is_video"](self.store, handle)) + + def ktx2_get_ldr_hdr_upconversion_nit_multiplier(self, handle: int) -> float: + return self.exports["bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier"](self.store, handle) + + # ------------------------------------------------------ + # Per-level metadata + # ------------------------------------------------------ + def ktx2_get_level_orig_width(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_orig_width"](self.store, h, lvl, layer, face) + + def ktx2_get_level_orig_height(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_orig_height"](self.store, h, lvl, layer, face) + + def ktx2_get_level_actual_width(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_actual_width"](self.store, h, lvl, layer, face) + + def ktx2_get_level_actual_height(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_actual_height"](self.store, h, lvl, layer, face) + + def ktx2_get_level_num_blocks_x(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_num_blocks_x"](self.store, h, lvl, layer, face) + + def ktx2_get_level_num_blocks_y(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_num_blocks_y"](self.store, h, lvl, layer, face) + + def ktx2_get_level_total_blocks(self, h, lvl, layer, face) -> int: + return self.exports["bt_ktx2_get_level_total_blocks"](self.store, h, lvl, layer, face) + + def ktx2_get_level_alpha_flag(self, h, lvl, layer, face) -> bool: + return bool(self.exports["bt_ktx2_get_level_alpha_flag"](self.store, h, lvl, layer, face)) + + def ktx2_get_level_iframe_flag(self, h, lvl, layer, face) -> bool: + return bool(self.exports["bt_ktx2_get_level_iframe_flag"](self.store, h, lvl, layer, face)) + + # ------------------------------------------------------ + # Transcoding control + # ------------------------------------------------------ + def ktx2_start_transcoding(self, handle: int) -> bool: + return bool(self.exports["bt_ktx2_start_transcoding"](self.store, handle)) + + def ktx2_create_transcode_state(self) -> int: + return self.exports["bt_ktx2_create_transcode_state"](self.store) + + def ktx2_destroy_transcode_state(self, handle: int): + return self.exports["bt_ktx2_destroy_transcode_state"](self.store, handle) + + # ------------------------------------------------------ + # Actual transcoding call + # ------------------------------------------------------ + def ktx2_transcode_image_level( + self, + ktx2_handle: int, + level_index: int, + layer_index: int, + face_index: int, + output_block_mem_ofs: int, + output_blocks_buf_size_in_blocks_or_pixels: int, + transcoder_texture_format_u32: int, + decode_flags: int, + output_row_pitch_in_blocks_or_pixels: int, + output_rows_in_pixels: int, + channel0: int, + channel1: int, + state_handle: int, + ) -> bool: + return bool(self.exports["bt_ktx2_transcode_image_level"]( + self.store, + ktx2_handle, + level_index, layer_index, face_index, + output_block_mem_ofs, + output_blocks_buf_size_in_blocks_or_pixels, + transcoder_texture_format_u32, + decode_flags, + output_row_pitch_in_blocks_or_pixels, + output_rows_in_pixels, + channel0, channel1, + state_handle + )) diff --git a/external/basis_universal/python/basisu_transcoder_pybind11.cpp b/external/basis_universal/python/basisu_transcoder_pybind11.cpp new file mode 100644 index 0000000000..4d4aa0f1b2 --- /dev/null +++ b/external/basis_universal/python/basisu_transcoder_pybind11.cpp @@ -0,0 +1,264 @@ +// File: basisu_transcoder_pybind11.cpp +// pybind11 native bindings for the transcoder's pure C API basisu_wasm_transcoder_api.h + +#include +#include + +#include "../encoder/basisu_wasm_transcoder_api.h" + +namespace py = pybind11; + +// wasm_bool_t is uint32_t — convert to Python bool +static inline bool to_bool(wasm_bool_t v) { return v != 0; } + +PYBIND11_MODULE(basisu_transcoder_python, m) { + m.doc() = "Native Basis Universal transcoder (pybind11 binding over basisu_wasm_transcoder_api)"; + + // ------------------------------------------------------------------------ + // High-level functions + // ------------------------------------------------------------------------ + m.def("get_version", &bt_get_version, + "Get BasisU transcoder version"); + + m.def("enable_debug_printf", + [](bool flag) { bt_enable_debug_printf(flag ? 1u : 0u); }, + "Enable or disable debug printf output"); + + m.def("init", &bt_init, + "Initialize transcoder library"); + + m.def("alloc", &bt_alloc, + "Allocate a buffer, returns uint64 offset/pointer"); + m.def("free", &bt_free, + "Free a buffer allocated by bt_alloc"); + + + // ------------------------------------------------------------------------ + // basis_tex_format helpers + // ------------------------------------------------------------------------ + m.def("basis_tex_format_is_xuastc_ldr", + [](uint32_t fmt) { return to_bool(bt_basis_tex_format_is_xuastc_ldr(fmt)); }); + + m.def("basis_tex_format_is_astc_ldr", + [](uint32_t fmt) { return to_bool(bt_basis_tex_format_is_astc_ldr(fmt)); }); + + m.def("basis_tex_format_get_block_width", + &bt_basis_tex_format_get_block_width); + + m.def("basis_tex_format_get_block_height", + &bt_basis_tex_format_get_block_height); + + m.def("basis_tex_format_is_hdr", + [](uint32_t fmt) { return to_bool(bt_basis_tex_format_is_hdr(fmt)); }); + + m.def("basis_tex_format_is_ldr", + [](uint32_t fmt) { return to_bool(bt_basis_tex_format_is_ldr(fmt)); }); + + + // ------------------------------------------------------------------------ + // transcoder_texture_format helpers + // ------------------------------------------------------------------------ + m.def("basis_get_bytes_per_block_or_pixel", + &bt_basis_get_bytes_per_block_or_pixel); + + m.def("basis_transcoder_format_has_alpha", + [](uint32_t tfmt) { return to_bool(bt_basis_transcoder_format_has_alpha(tfmt)); }); + + m.def("basis_transcoder_format_is_hdr", + [](uint32_t tfmt) { return to_bool(bt_basis_transcoder_format_is_hdr(tfmt)); }); + + m.def("basis_transcoder_format_is_ldr", + [](uint32_t tfmt) { return to_bool(bt_basis_transcoder_format_is_ldr(tfmt)); }); + + m.def("basis_transcoder_texture_format_is_astc", + [](uint32_t tfmt) { return to_bool(bt_basis_transcoder_texture_format_is_astc(tfmt)); }); + + m.def("basis_transcoder_format_is_uncompressed", + [](uint32_t tfmt) { return to_bool(bt_basis_transcoder_format_is_uncompressed(tfmt)); }); + + m.def("basis_get_uncompressed_bytes_per_pixel", + &bt_basis_get_uncompressed_bytes_per_pixel); + + m.def("basis_get_block_width", + &bt_basis_get_block_width); + + m.def("basis_get_block_height", + &bt_basis_get_block_height); + + m.def("basis_get_transcoder_texture_format_from_basis_tex_format", + &bt_basis_get_transcoder_texture_format_from_basis_tex_format); + + m.def("basis_is_format_supported", + [](uint32_t tfmt, uint32_t basis_fmt) { + return to_bool(bt_basis_is_format_supported(tfmt, basis_fmt)); + }); + + m.def("basis_compute_transcoded_image_size_in_bytes", + &bt_basis_compute_transcoded_image_size_in_bytes); + + + // ------------------------------------------------------------------------ + // KTX2 open/close & basic info + // ------------------------------------------------------------------------ + m.def("ktx2_open", &bt_ktx2_open, + "Open a KTX2 image from memory; returns handle"); + + m.def("ktx2_close", &bt_ktx2_close, + "Close a previously opened KTX2 handle"); + + m.def("ktx2_get_width", &bt_ktx2_get_width); + m.def("ktx2_get_height", &bt_ktx2_get_height); + m.def("ktx2_get_levels", &bt_ktx2_get_levels); + m.def("ktx2_get_faces", &bt_ktx2_get_faces); + m.def("ktx2_get_layers", &bt_ktx2_get_layers); + + m.def("ktx2_get_basis_tex_format", &bt_ktx2_get_basis_tex_format); + + m.def("ktx2_is_etc1s", + [](uint64_t h) { return to_bool(bt_ktx2_is_etc1s(h)); }); + + m.def("ktx2_is_uastc_ldr_4x4", + [](uint64_t h) { return to_bool(bt_ktx2_is_uastc_ldr_4x4(h)); }); + + m.def("ktx2_is_hdr", + [](uint64_t h) { return to_bool(bt_ktx2_is_hdr(h)); }); + + m.def("ktx2_is_hdr_4x4", + [](uint64_t h) { return to_bool(bt_ktx2_is_hdr_4x4(h)); }); + + m.def("ktx2_is_hdr_6x6", + [](uint64_t h) { return to_bool(bt_ktx2_is_hdr_6x6(h)); }); + + m.def("ktx2_is_ldr", + [](uint64_t h) { return to_bool(bt_ktx2_is_ldr(h)); }); + + m.def("ktx2_is_astc_ldr", + [](uint64_t h) { return to_bool(bt_ktx2_is_astc_ldr(h)); }); + + m.def("ktx2_is_xuastc_ldr", + [](uint64_t h) { return to_bool(bt_ktx2_is_xuastc_ldr(h)); }); + + m.def("ktx2_get_block_width", &bt_ktx2_get_block_width); + + m.def("ktx2_get_block_height", &bt_ktx2_get_block_height); + + m.def("ktx2_has_alpha", + [](uint64_t h) { return to_bool(bt_ktx2_has_alpha(h)); }); + + m.def("ktx2_get_dfd_color_model", &bt_ktx2_get_dfd_color_model); + m.def("ktx2_get_dfd_color_primaries", &bt_ktx2_get_dfd_color_primaries); + m.def("ktx2_get_dfd_transfer_func", &bt_ktx2_get_dfd_transfer_func); + + m.def("ktx2_is_srgb", + [](uint64_t h) { return to_bool(bt_ktx2_is_srgb(h)); }); + + m.def("ktx2_get_dfd_flags", &bt_ktx2_get_dfd_flags); + m.def("ktx2_get_dfd_total_samples", &bt_ktx2_get_dfd_total_samples); + m.def("ktx2_get_dfd_channel_id0", &bt_ktx2_get_dfd_channel_id0); + m.def("ktx2_get_dfd_channel_id1", &bt_ktx2_get_dfd_channel_id1); + + m.def("ktx2_is_video", + [](uint64_t h) { return to_bool(bt_ktx2_is_video(h)); }); + + m.def("ktx2_get_ldr_hdr_upconversion_nit_multiplier", + &bt_ktx2_get_ldr_hdr_upconversion_nit_multiplier); + + + // ------------------------------------------------------------------------ + // KTX2 per-level info + // ------------------------------------------------------------------------ + m.def("ktx2_get_level_orig_width", + &bt_ktx2_get_level_orig_width); + + m.def("ktx2_get_level_orig_height", + &bt_ktx2_get_level_orig_height); + + m.def("ktx2_get_level_actual_width", + &bt_ktx2_get_level_actual_width); + + m.def("ktx2_get_level_actual_height", + &bt_ktx2_get_level_actual_height); + + m.def("ktx2_get_level_num_blocks_x", + &bt_ktx2_get_level_num_blocks_x); + + m.def("ktx2_get_level_num_blocks_y", + &bt_ktx2_get_level_num_blocks_y); + + m.def("ktx2_get_level_total_blocks", + &bt_ktx2_get_level_total_blocks); + + m.def("ktx2_get_level_alpha_flag", + [](uint64_t h, uint32_t level, uint32_t layer, uint32_t face) { + return to_bool(bt_ktx2_get_level_alpha_flag(h, level, layer, face)); + }); + + m.def("ktx2_get_level_iframe_flag", + [](uint64_t h, uint32_t level, uint32_t layer, uint32_t face) { + return to_bool(bt_ktx2_get_level_iframe_flag(h, level, layer, face)); + }); + + + // ------------------------------------------------------------------------ + // Transcoding state and operations + // ------------------------------------------------------------------------ + m.def("ktx2_start_transcoding", + [](uint64_t h) { return to_bool(bt_ktx2_start_transcoding(h)); }); + + m.def("ktx2_create_transcode_state", + &bt_ktx2_create_transcode_state); + + m.def("ktx2_destroy_transcode_state", + &bt_ktx2_destroy_transcode_state); + + m.def("ktx2_transcode_image_level", + [](uint64_t ktx2_handle, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint64_t out_mem_ofs, + uint32_t out_blocks_or_pixels, + uint32_t transcoder_texture_format_u32, + uint32_t decode_flags, + uint32_t row_pitch_blocks_or_pixels, + uint32_t rows_in_pixels, + int channel0, int channel1, + uint64_t state_handle) + { + return to_bool(bt_ktx2_transcode_image_level( + ktx2_handle, + level_index, layer_index, face_index, + out_mem_ofs, + out_blocks_or_pixels, + transcoder_texture_format_u32, + decode_flags, + row_pitch_blocks_or_pixels, + rows_in_pixels, + channel0, channel1, + state_handle)); + }, + py::arg("ktx2_handle"), + py::arg("level_index"), + py::arg("layer_index"), + py::arg("face_index"), + py::arg("output_block_mem_ofs"), + py::arg("output_blocks_buf_size_in_blocks_or_pixels"), + py::arg("transcoder_texture_format_u32"), + py::arg("decode_flags"), + py::arg("output_row_pitch_in_blocks_or_pixels") = 0, + py::arg("output_rows_in_pixels") = 0, + py::arg("channel0") = -1, + py::arg("channel1") = -1, + py::arg("state_handle") = 0); + + m.def("read_memory", + [](uint64_t ptr, uint32_t size) { + return py::bytes((const char*)ptr, size); + }, + "Read `size` bytes starting at native memory address `ptr`"); + + m.def("write_memory", + [](uint64_t dest_ptr, py::buffer src) { + py::buffer_info info = src.request(); + memcpy((void*)dest_ptr, info.ptr, info.size * info.itemsize); + }, + "Write bytes/buffer-like object into native memory at address `ptr`"); +} diff --git a/external/basis_universal/python/dds_writer.py b/external/basis_universal/python/dds_writer.py new file mode 100644 index 0000000000..f1c95433c0 --- /dev/null +++ b/external/basis_universal/python/dds_writer.py @@ -0,0 +1,332 @@ +# dds_writer.py +# +# Minimal DDS writer that mirrors the C/C++ save_dds() implementation you provided. +# It writes a DX9-style DDS header, and optionally a DX10 extension header, +# followed by the raw compressed blocks. +# +# No mipmaps, no cubes, no 3D volumes – exactly like the original C code. + +import struct +import sys +from typing import Union + + +# --------------------------------------------------------------------------- +# FourCC helper (same as PIXEL_FMT_FOURCC macro) +# --------------------------------------------------------------------------- +def make_fourcc(a: str, b: str, c: str, d: str) -> int: + return (ord(a) | + (ord(b) << 8) | + (ord(c) << 16) | + (ord(d) << 24)) + + +# --------------------------------------------------------------------------- +# DDS-related constants (only the ones we actually use) +# --------------------------------------------------------------------------- + +# DDSD flags +DDSD_CAPS = 0x00000001 +DDSD_HEIGHT = 0x00000002 +DDSD_WIDTH = 0x00000004 +DDSD_PIXELFORMAT= 0x00001000 +DDSD_LINEARSIZE = 0x00080000 + +# DDPF flags +DDPF_FOURCC = 0x00000004 + +# DDSCAPS flags +DDSCAPS_TEXTURE = 0x00001000 + +# DXGI_FORMAT subset (values must match the C enum) +class DXGI_FORMAT: + UNKNOWN = 0 + BC1_UNORM = 71 + BC3_UNORM = 77 + BC4_UNORM = 80 + BC5_UNORM = 83 + # You can add more as needed; for DX10 header we just write the integer value. + +# DX10 resource dimension +class D3D10_RESOURCE_DIMENSION: + UNKNOWN = 0 + BUFFER = 1 + TEXTURE1D = 2 + TEXTURE2D = 3 + TEXTURE3D = 4 + + +# --------------------------------------------------------------------------- +# DDS writer class +# --------------------------------------------------------------------------- +class DDSWriter: + """ + Python port of the C save_dds() function. + + Usage: + writer = DDSWriter() + ok = writer.save_dds( + filename="out.dds", + width=width, + height=height, + blocks=bc_data, # bytes or bytearray + pixel_format_bpp=4, # e.g. 4 for BC1, 8 for BC3/4/5/etc. + dxgi_format=DXGI_FORMAT.BC1_UNORM, + srgb=False, + force_dx10_header=False, + ) + """ + + DDS_MAGIC = b"DDS " # same as fwrite("DDS ", 4, 1, pFile); + + def save_dds( + self, + filename: str, + width: int, + height: int, + blocks: Union[bytes, bytearray, memoryview], + pixel_format_bpp: int, + dxgi_format: int, + srgb: bool = False, + force_dx10_header: bool = False, + ) -> bool: + """ + Port of: + bool save_dds(const char* pFilename, + uint32_t width, uint32_t height, + const void* pBlocks, + uint32_t pixel_format_bpp, + DXGI_FORMAT dxgi_format, + bool srgb, + bool force_dx10_header); + + The 'blocks' buffer is written as-is (up to computed linear size). + """ + + # srgb is intentionally unused in the original C code (commented logic). + _ = srgb + + # Open file like the C code + try: + f = open(filename, "wb") + except OSError: + print(f"Failed creating file {filename}!", file=sys.stderr) + return False + + try: + # Write the "DDS " magic + f.write(self.DDS_MAGIC) + + # ----------------------------------------------------------------- + # Build DDSURFACEDESC2 equivalent + # ----------------------------------------------------------------- + # We'll pack DDSURFACEDESC2 as 31 uint32's (124 bytes) in little-endian: + # struct DDSURFACEDESC2 { + # uint32 dwSize; + # uint32 dwFlags; + # uint32 dwHeight; + # uint32 dwWidth; + # uint32 lPitch_or_dwLinearSize; + # uint32 dwBackBufferCount; + # uint32 dwMipMapCount; + # uint32 dwAlphaBitDepth; + # uint32 dwUnused0; + # uint32 lpSurface; + # DDCOLORKEY unused0; (2 * uint32) + # DDCOLORKEY unused1; (2 * uint32) + # DDCOLORKEY unused2; (2 * uint32) + # DDCOLORKEY unused3; (2 * uint32) + # DDPIXELFORMAT ddpfPixelFormat; (8 * uint32) + # DDSCAPS2 ddsCaps; (4 * uint32) + # uint32 dwUnused1; + # }; + + dwSize = 124 # sizeof(DDSURFACEDESC2) + + dwFlags = ( + DDSD_WIDTH | + DDSD_HEIGHT | + DDSD_PIXELFORMAT | + DDSD_CAPS + ) + + dwWidth = int(width) + dwHeight = int(height) + + # lPitch (actually LinearSize for compressed formats), same as: + # (((dwWidth + 3) & ~3) * ((dwHeight + 3) & ~3) * pixel_format_bpp) >> 3; + lPitch = ( + ((dwWidth + 3) & ~3) + * ((dwHeight + 3) & ~3) + * int(pixel_format_bpp) + ) >> 3 + + dwFlags |= DDSD_LINEARSIZE + + dwBackBufferCount = 0 + dwMipMapCount = 0 + dwAlphaBitDepth = 0 + dwUnused0 = 0 + lpSurface = 0 + + # DDCOLORKEY unused0..3, all zero + ddcolorkey_zero = [0, 0] * 4 # 4 DDCOLORKEY structs + + # DDPIXELFORMAT + # struct DDPIXELFORMAT { + # uint32 dwSize; + # uint32 dwFlags; + # uint32 dwFourCC; + # uint32 dwRGBBitCount; + # uint32 dwRBitMask; + # uint32 dwGBitMask; + # uint32 dwBBitMask; + # uint32 dwRGBAlphaBitMask; + # }; + ddpf_dwSize = 32 + ddpf_dwFlags = DDPF_FOURCC + ddpf_dwFourCC = 0 + ddpf_dwRGBBitCount = 0 + ddpf_dwRBitMask = 0 + ddpf_dwGBitMask = 0 + ddpf_dwBBitMask = 0 + ddpf_dwRGBAlphaBitMask = 0 + + # DDSCAPS2 + # struct DDSCAPS2 { + # uint32 dwCaps; + # uint32 dwCaps2; + # uint32 dwCaps3; + # uint32 dwCaps4; + # }; + ddsCaps_dwCaps = DDSCAPS_TEXTURE + ddsCaps_dwCaps2 = 0 + ddsCaps_dwCaps3 = 0 + ddsCaps_dwCaps4 = 0 + + dwUnused1 = 0 + + # Decide whether to use legacy FourCC (DXT1/DXT5/ATI1/ATI2) or DX10 header + use_legacy = ( + not force_dx10_header and + dxgi_format in ( + DXGI_FORMAT.BC1_UNORM, + DXGI_FORMAT.BC3_UNORM, + DXGI_FORMAT.BC4_UNORM, + DXGI_FORMAT.BC5_UNORM, + ) + ) + + if use_legacy: + if dxgi_format == DXGI_FORMAT.BC1_UNORM: + ddpf_dwFourCC = make_fourcc('D', 'X', 'T', '1') + elif dxgi_format == DXGI_FORMAT.BC3_UNORM: + ddpf_dwFourCC = make_fourcc('D', 'X', 'T', '5') + elif dxgi_format == DXGI_FORMAT.BC4_UNORM: + ddpf_dwFourCC = make_fourcc('A', 'T', 'I', '1') + elif dxgi_format == DXGI_FORMAT.BC5_UNORM: + ddpf_dwFourCC = make_fourcc('A', 'T', 'I', '2') + else: + # Write DX10 header, FourCC = "DX10" + ddpf_dwFourCC = make_fourcc('D', 'X', '1', '0') + + # Build the 31 uint32's for DDSURFACEDESC2 + header_values = [ + dwSize, + dwFlags, + dwHeight, + dwWidth, + lPitch, + dwBackBufferCount, + dwMipMapCount, + dwAlphaBitDepth, + dwUnused0, + lpSurface, + ] + + header_values.extend(ddcolorkey_zero) # 8 uint32's + + ddpf_values = [ + ddpf_dwSize, + ddpf_dwFlags, + ddpf_dwFourCC, + ddpf_dwRGBBitCount, + ddpf_dwRBitMask, + ddpf_dwGBitMask, + ddpf_dwBBitMask, + ddpf_dwRGBAlphaBitMask, + ] + header_values.extend(ddpf_values) # 8 uint32's + + ddsCaps_values = [ + ddsCaps_dwCaps, + ddsCaps_dwCaps2, + ddsCaps_dwCaps3, + ddsCaps_dwCaps4, + ] + header_values.extend(ddsCaps_values) # 4 uint32's + + header_values.append(dwUnused1) # final uint32 + + if len(header_values) != 31: + raise RuntimeError("Internal error: DDSURFACEDESC2 must contain 31 uint32's") + + # Pack and write DDSURFACEDESC2 + dds_header = struct.pack("<31I", *header_values) + f.write(dds_header) + + # If needed, write the DX10 header (DDS_HEADER_DXT10) + if not use_legacy: + # struct DDS_HEADER_DXT10 { + # DXGI_FORMAT dxgiFormat; + # D3D10_RESOURCE_DIMENSION resourceDimension; + # uint32 miscFlag; + # uint32 arraySize; + # uint32 miscFlags2; + # }; + dxgiFormat = int(dxgi_format) + resourceDimension = D3D10_RESOURCE_DIMENSION.TEXTURE2D + miscFlag = 0 + arraySize = 1 + miscFlags2 = 0 + + dxt10_header = struct.pack( + "<5I", + dxgiFormat, + resourceDimension, + miscFlag, + arraySize, + miscFlags2, + ) + f.write(dxt10_header) + + # ----------------------------------------------------------------- + # Write the actual texture data blocks (pBlocks) + # ----------------------------------------------------------------- + + # C code: fwrite(pBlocks, desc.lPitch, 1, pFile); + # i.e. write exactly lPitch bytes. + data = memoryview(blocks) + if len(data) < lPitch: + raise ValueError( + f"blocks buffer too small: need at least {lPitch} bytes, got {len(data)}" + ) + f.write(data[:lPitch]) + + except Exception as e: + # Mimic the C-style error reporting as much as practical + print(f"Failed writing to DDS file {filename}: {e}", file=sys.stderr) + try: + f.close() + except Exception: + pass + return False + + # Close file + try: + f.close() + except OSError: + print(f"Failed closing DDS file {filename}!", file=sys.stderr) + return False + + return True diff --git a/external/basis_universal/python/explode_ktx2_file.py b/external/basis_universal/python/explode_ktx2_file.py new file mode 100644 index 0000000000..0c414b0602 --- /dev/null +++ b/external/basis_universal/python/explode_ktx2_file.py @@ -0,0 +1,413 @@ +#!/usr/bin/env python3 +""" +explode_ktx2_file.py +FULL LDR/HDR KTX2 EXPLODER + FULL API INTROSPECTION + ASTC + BC7/BC6H OUTPUT + +Usage: + python3 explode_ktx2_file.py input.ktx2 + python3 explode_ktx2_file.py input.ktx2 --info-only +""" + +# Python Dependencies (beyond basisu_py): +# numpy +# pillow +# imageio (v3+) +# wasmtime +# +# System Dependencies: +# OpenImageIO ("oiiotool") -- required for EXR output +# +# Install Python deps: +# pip install numpy pillow imageio wasmtime +# +# On Ubuntu: +# sudo apt install openimageio-tools +# +# On macOS (Homebrew): +# brew install openimageio + +import sys +import os +import numpy as np +import subprocess +import tempfile +import imageio.v3 as iio +from PIL import Image + +from basisu_py import Transcoder +from basisu_py.constants import TranscoderTextureFormat as TF + +# Writers located in same directory as this script +from astc_writer import write_astc_file +from dds_writer import DDSWriter + + +# ============================================================================ +# File-writing helpers +# ============================================================================ +def save_exr(path, rgba32f): + """ + Save float32 RGBA as EXR if possible. + If oiiotool is not available, save TIFF instead (Windows-safe). + """ + import numpy as np + import imageio.v3 as iio + import subprocess, tempfile, os + + # Write temp TIFF + with tempfile.NamedTemporaryFile(suffix=".tiff", delete=False) as tmp: + temp_path = tmp.name + + iio.imwrite(temp_path, rgba32f.astype(np.float32)) + + # Try EXR via oiiotool + try: + subprocess.run(["oiiotool", temp_path, "-o", path], check=True) + os.remove(temp_path) + print(" Wrote EXR:", path) + return + + except Exception: + # --- FALLBACK: save TIFF --- + fallback_path = path + ".tiff" + + # Windows cannot overwrite files via rename(), so remove first + if os.path.exists(fallback_path): + os.remove(fallback_path) + + # os.replace() always overwrites + os.replace(temp_path, fallback_path) + + print(" [Fallback] Wrote TIFF instead:", fallback_path) + + +def save_png(path, rgba8): + img = Image.fromarray(rgba8, mode="RGBA") + img.save(path) + print(f" PNG saved: {path}") + + +# ============================================================================ +# Pretty header +# ============================================================================ +def print_header(title): + print("\n" + "=" * 90) + print(title) + print("=" * 90) + + +# ============================================================================ +# Full top-level metadata dump (ALL API) +# ============================================================================ +def dump_all_top_level(t, h): + print_header("TOP-LEVEL KTX2 METADATA FULL API") + + print("Backend :", t.backend_name) + print("Version :", t.get_version()) + print("Width :", t.get_width(h)) + print("Height :", t.get_height(h)) + print("Levels :", t.get_levels(h)) + print("Faces :", t.get_faces(h)) + + layers = t.get_layers(h) + eff_layers = layers if layers > 0 else 1 + print("Layers (raw) :", layers) + print("Layers (effective) :", eff_layers) + + fmt = t.get_basis_tex_format(h) + print("\nBasisTexFormat :", fmt) + + print("\nKTX2 Format Flags:") + print(" is_etc1s :", t.is_etc1s(h)) + print(" is_uastc_ldr_4x4 :", t.is_uastc_ldr_4x4(h)) + print(" is_xuastc_ldr :", t.is_xuastc_ldr(h)) + print(" is_astc_ldr :", t.is_astc_ldr(h)) + print(" is_hdr :", t.is_hdr(h)) + print(" is_hdr_4x4 :", t.is_hdr_4x4(h)) + print(" is_hdr_6x6 :", t.is_hdr_6x6(h)) + print(" is_ldr :", t.is_ldr(h)) + print(" is_srgb :", t.is_srgb(h)) + print(" is_video :", t.is_video(h)) + print(" has_alpha :", t.has_alpha(h)) + + print("\nBlock Info:") + print(" block_width :", t.get_block_width(h)) + print(" block_height :", t.get_block_height(h)) + + print("\nDFD Info:") + print(" color_model :", t.get_dfd_color_model(h)) + print(" color_primaries :", t.get_dfd_color_primaries(h)) + print(" transfer_func :", t.get_dfd_transfer_func(h)) + print(" flags :", t.get_dfd_flags(h)) + print(" total_samples :", t.get_dfd_total_samples(h)) + print(" channel_id0 :", t.get_dfd_channel_id0(h)) + print(" channel_id1 :", t.get_dfd_channel_id1(h)) + + if t.is_hdr(h): + print(" hdr_nit_multiplier :", t.get_ldr_hdr_upconversion_nit_multiplier(h)) + + +# ============================================================================ +# BasisTexFormat helpers +# ============================================================================ +def dump_basis_tex_format_helpers(t, h): + print_header("BasisTexFormat HELPERS (FULL)") + + fmt = t.get_basis_tex_format(h) + print("basis_tex_format:", fmt) + + print("is_xuastc_ldr :", t.basis_tex_format_is_xuastc_ldr(fmt)) + print("is_astc_ldr :", t.basis_tex_format_is_astc_ldr(fmt)) + print("block width :", t.basis_tex_format_get_block_width(fmt)) + print("block height :", t.basis_tex_format_get_block_height(fmt)) + print("is_hdr :", t.basis_tex_format_is_hdr(fmt)) + print("is_ldr :", t.basis_tex_format_is_ldr(fmt)) + + +# ============================================================================ +# Level / Layer / Face metadata dump +# ============================================================================ +def dump_per_level_info(t, h): + print_header("PER-LEVEL / PER-LAYER / PER-FACE METADATA") + + levels = t.get_levels(h) + faces = t.get_faces(h) + layers = t.get_layers(h) + if layers == 0: + layers = 1 + + for level in range(levels): + for layer in range(layers): + for face in range(faces): + print(f"\nLevel={level}, Layer={layer}, Face={face}") + print(" orig_width :", t.get_level_orig_width(h, level, layer, face)) + print(" orig_height :", t.get_level_orig_height(h, level, layer, face)) + print(" actual_width :", t.get_level_actual_width(h, level, layer, face)) + print(" actual_height:", t.get_level_actual_height(h, level, layer, face)) + print(" blocks_x :", t.get_level_num_blocks_x(h, level, layer, face)) + print(" blocks_y :", t.get_level_num_blocks_y(h, level, layer, face)) + print(" total_blocks :", t.get_level_total_blocks(h, level, layer, face)) + print(" alpha_flag :", t.get_level_alpha_flag(h, level, layer, face)) + print(" iframe_flag :", t.get_level_iframe_flag(h, level, layer, face)) + + +# ============================================================================ +# ASTC Selection +# ============================================================================ +def choose_astc_format(t, h): + fmt = t.get_basis_tex_format(h) + tfmt = t.basis_get_transcoder_texture_format_from_basis_tex_format(fmt) + bw = t.basis_get_block_width(tfmt) + bh = t.basis_get_block_height(tfmt) + + print_header("ASTC SELECTION") + print("ASTC TF:", tfmt) + print(f"Block dims: {bw}x{bh}") + return tfmt, bw, bh + + +# ============================================================================ +# BC Format Selection +# ============================================================================ +def choose_bc_format(t, h): + if t.is_hdr(h): + print_header("HDR -> BC6H") + return TF.TF_BC6H, 8, 95 # DXGI_FORMAT_BC6H_UF16 + else: + print_header("LDR -> BC7") + return TF.TF_BC7_RGBA, 8, 98 # DXGI_FORMAT_BC7_UNORM + + +# ============================================================================ +# Full explode transcoding (using handle API + per-level dims) +# ============================================================================ +def explode_transcode(t, h): + levels = t.get_levels(h) + faces = t.get_faces(h) + layers = t.get_layers(h) + if layers == 0: + layers = 1 + + astc_tfmt, astc_bw, astc_bh = choose_astc_format(t, h) + bc_tfmt, bc_bpp, bc_dxgi = choose_bc_format(t, h) + + ddsw = DDSWriter() + print_header("BEGIN EXPLODE TRANSCODING (handle API)") + + for level in range(levels): + for layer in range(layers): + for face in range(faces): + + print(f"\n- Level={level} Layer={layer} Face={face}") + + ow = t.get_level_orig_width(h, level, layer, face) + oh = t.get_level_orig_height(h, level, layer, face) + print(f" Level orig dims: {ow}x{oh}") + + # ASTC + astc_blocks = t.transcode_tfmt_handle( + h, astc_tfmt, + level=level, layer=layer, face=face, + decode_flags=0, channel0=-1, channel1=-1 + ) + astc_name = f"astc_L{level}_Y{layer}_F{face}.astc" + write_astc_file(astc_name, astc_blocks, astc_bw, astc_bh, ow, oh) + print(" ASTC saved:", astc_name) + + # BC6H / BC7 + bc_blocks = t.transcode_tfmt_handle( + h, bc_tfmt, + level=level, layer=layer, face=face, + decode_flags=0, channel0=-1, channel1=-1 + ) + if t.is_hdr(h): + dds_name = f"bc6h_L{level}_Y{layer}_F{face}.dds" + else: + dds_name = f"bc7_L{level}_Y{layer}_F{face}.dds" + + ddsw.save_dds( + dds_name, + width=ow, height=oh, + blocks=bc_blocks, + pixel_format_bpp=bc_bpp, + dxgi_format=bc_dxgi, + srgb=False, + force_dx10_header=True, + ) + print(" DDS saved :", dds_name) + + print_header("EXPLODE TRANSCODING COMPLETE") + + +# ============================================================================ +# Decode each (Level, Layer, Face) to PNG or EXR +# ============================================================================ +def explode_decode_images(t, h): + print_header("BEGIN EXPLODE IMAGE DECODE (PNG/EXR)") + + levels = t.get_levels(h) + faces = t.get_faces(h) + layers = t.get_layers(h) + if layers == 0: + layers = 1 + + hdr = t.is_hdr(h) + + for level in range(levels): + for layer in range(layers): + for face in range(faces): + + print(f"\n- Decode Level={level} Layer={layer} Face={face}") + + ow = t.get_level_orig_width(h, level, layer, face) + oh = t.get_level_orig_height(h, level, layer, face) + + if hdr: + rgba32f = t.decode_rgba_hdr_handle(h, level, layer, face) + outname = f"exr_L{level}_Y{layer}_F{face}.exr" + save_exr(outname, rgba32f) + else: + rgba8 = t.decode_rgba_handle(h, level, layer, face) + outname = f"png_L{level}_Y{layer}_F{face}.png" + save_png(outname, rgba8) + + print_header("IMAGE DECODE COMPLETE") + +def dump_transcoder_texture_format_helpers(t): + print_header("TranscoderTextureFormat HELPERS (FULL)") + + test_formats = [ + # uncompressed + TF.TF_RGBA32, TF.TF_RGB565, TF.TF_BGR565, + TF.TF_RGBA4444, TF.TF_RGB_HALF, TF.TF_RGBA_HALF, TF.TF_RGB_9E5, + + # basic compressed + TF.TF_ETC1_RGB, TF.TF_ETC2_RGBA, + TF.TF_BC1_RGB, TF.TF_BC3_RGBA, + TF.TF_BC4_R, TF.TF_BC5_RG, + TF.TF_BC7_RGBA, TF.TF_BC6H, + TF.TF_ETC2_EAC_R11, TF.TF_ETC2_EAC_RG11, + TF.TF_FXT1_RGB, + TF.TF_PVRTC1_4_RGB, TF.TF_PVRTC1_4_RGBA, + TF.TF_PVRTC2_4_RGB, TF.TF_PVRTC2_4_RGBA, + TF.TF_ATC_RGB, TF.TF_ATC_RGBA, + + # HDR ASTC + TF.TF_ASTC_HDR_4X4_RGBA, + TF.TF_ASTC_HDR_6X6_RGBA, + + # LDR ASTC + TF.TF_ASTC_LDR_4X4_RGBA, + TF.TF_ASTC_LDR_5X4_RGBA, TF.TF_ASTC_LDR_5X5_RGBA, + TF.TF_ASTC_LDR_6X5_RGBA, TF.TF_ASTC_LDR_6X6_RGBA, + TF.TF_ASTC_LDR_8X5_RGBA, TF.TF_ASTC_LDR_8X6_RGBA, + TF.TF_ASTC_LDR_10X5_RGBA, TF.TF_ASTC_LDR_10X6_RGBA, + TF.TF_ASTC_LDR_8X8_RGBA, TF.TF_ASTC_LDR_10X8_RGBA, + TF.TF_ASTC_LDR_10X10_RGBA, TF.TF_ASTC_LDR_12X10_RGBA, + TF.TF_ASTC_LDR_12X12_RGBA, + ] + + for tfmt in test_formats: + print(f"\nTF={tfmt}") + print(" has_alpha :", t.basis_transcoder_format_has_alpha(tfmt)) + print(" is_hdr :", t.basis_transcoder_format_is_hdr(tfmt)) + print(" is_ldr :", t.basis_transcoder_format_is_ldr(tfmt)) + print(" is_astc :", t.basis_transcoder_texture_format_is_astc(tfmt)) + print(" is_uncompressed :", t.basis_transcoder_format_is_uncompressed(tfmt)) + print(" bytes/block :", t.basis_get_bytes_per_block_or_pixel(tfmt)) + print(" block_width :", t.basis_get_block_width(tfmt)) + print(" block_height :", t.basis_get_block_height(tfmt)) + + +def main(): + if len(sys.argv) < 2: + print("Usage: python explode_ktx2_file.py input.ktx2 [--info-only] [--print-tf]") + return 1 + + args = sys.argv[1:] + info_only = "--info-only" in args + print_tf = "--print-tf" in args or "--transcoder-formats" in args + + # Determine input filename + input_file = None + for a in args: + if not a.startswith("--"): + input_file = a + break + + if input_file is None: + print("Error: No input file provided.") + return 1 + + ktx_bytes = open(input_file, "rb").read() + + t = Transcoder() + h = t.open(ktx_bytes) + t.start_transcoding(h) + + # Full metadata + dump_all_top_level(t, h) + dump_basis_tex_format_helpers(t, h) + dump_per_level_info(t, h) + + # Optional TF helpers + if print_tf: + dump_transcoder_texture_format_helpers(t) + + if info_only: + print_header("INFO-ONLY MODE NO FILES WRITTEN") + t.close(h) + return 0 + + # Full output + explode_transcode(t, h) + explode_decode_images(t, h) + + t.close(h) + print("Success") + return 0 + +if __name__ == "__main__": + sys.exit(main()) + + diff --git a/external/basis_universal/python/lowlevel_test_native/__init__.py b/external/basis_universal/python/lowlevel_test_native/__init__.py new file mode 100644 index 0000000000..143f486c05 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_native/__init__.py @@ -0,0 +1 @@ +# __init__.py diff --git a/external/basis_universal/python/lowlevel_test_native/basic_test.py b/external/basis_universal/python/lowlevel_test_native/basic_test.py new file mode 100644 index 0000000000..44004e2dd1 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_native/basic_test.py @@ -0,0 +1,127 @@ +# basic_test.py +import sys +sys.path.append("basisu_py") # make sure Python can load the .so + +import basisu_python as bu +from constants import * + +import ctypes +import math + +def generate_swirl_rgba8(width, height): + """ + Generate a smooth colorful swirl procedural RGBA8 test image. + Returns: a ctypes array of type (c_ubyte * (width * height * 4)) + """ + pixel_count = width * height * 4 + img = (ctypes.c_ubyte * pixel_count)() + + for y in range(height): + for x in range(width): + i = (y * width + x) * 4 + + dx = x - width / 2 + dy = y - height / 2 + + dist = math.hypot(dx, dy) + angle = math.atan2(dy, dx) + + # Color swirl pattern + r = int((math.sin(dist * 0.15) * 0.5 + 0.5) * 255) + g = int((math.sin(angle * 3.0) * 0.5 + 0.5) * 255) + b = int((math.cos(dist * 0.10 + angle * 2.0) * 0.5 + 0.5) * 255) + + img[i + 0] = r & 255 + img[i + 1] = g & 255 + img[i + 2] = b & 255 + img[i + 3] = 255 + + return img + +def generate_test_pattern_rgba8(width, height): + """ + Generate a simple deterministic RGBA8 test pattern: + R = x + G = y + B = x^y + A = 255 + """ + import ctypes + + pixel_count = width * height * 4 + img = (ctypes.c_ubyte * pixel_count)() + + for y in range(height): + for x in range(width): + i = (y * width + x) * 4 + + img[i + 0] = x & 0xFF + img[i + 1] = y & 0xFF + img[i + 2] = (x ^ y) & 0xFF + img[i + 3] = 255 + + return img + +# ------------------------------------------------------------ +# BasisU compression test (NATIVE C++) +# ------------------------------------------------------------ + +print("Native BasisU version:", bu.get_version()) +bu.init() + +# Create comp params +params = bu.new_params() +print("Params handle:", params) + +# Create RGBA8 swirl (64 x 64) +W, H = 512, 512 +pixel_count = W * H * 4 + +# Generate swirl image in PYTHON memory + +img = generate_swirl_rgba8(W, H) +#img = generate_test_pattern_rgba8(W, H) + +# Allocate memory inside NATIVE C++ heap +img_ptr = bu.alloc(pixel_count) + +# Copy Python swirl image ? C++ heap buffer +ctypes.memmove(img_ptr, img, pixel_count) + +# Set into BasisU +pitch = W * 4 +ok = bu.set_image_rgba32(params, 0, img_ptr, W, H, pitch) +print("Set image:", ok) + +# Compress (UASTC LDR 4x4 = 1) +ok = bu.compress( + params, + BasisTexFormat.cASTC_LDR_4x4, # basis_tex_format + BasisQuality.MAX, # quality + BasisEffort.DEFAULT, # effort + BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB | BasisFlags.THREADED | BasisFlags.DEBUG_OUTPUT | BasisFlags.VERBOSE, # flags + 0.0 # rdo +) +print("Compress:", ok) + +# Retrieve compressed data +size = bu.get_comp_data_size(params) +ofs = bu.get_comp_data_ofs(params) + +print("Output size =", size, "ptr =", ofs) + +# Copy bytes out of native memory +byte_ptr = ctypes.cast(ofs, ctypes.POINTER(ctypes.c_ubyte)) +blob = bytes(byte_ptr[i] for i in range(size)) + +print("First 16 bytes:", blob[:16]) + +# Save to KTX2 +with open("out_native.ktx2", "wb") as f: + f.write(blob) + +print("Saved out_native.ktx2") + +# Cleanup +bu.delete_params(params) +bu.free(img_ptr) diff --git a/external/basis_universal/python/lowlevel_test_native/example_capi_python.py b/external/basis_universal/python/lowlevel_test_native/example_capi_python.py new file mode 100644 index 0000000000..d24f48e805 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_native/example_capi_python.py @@ -0,0 +1,481 @@ +#!/usr/bin/env python3 +# example_capi_python.py +# +# Simple Python port of example_capi.c using native C++ pybind11 bindings: +# - basisu_python (encoder) +# - basisu_transcoder_python (transcoder) +# +# Requires: +# basisu_py/basisu_python*.so +# basisu_py/basisu_transcoder_python*.so +# basisu_py/constants.py + +import sys +import os +import math +import ctypes + +# Make sure Python can see the native .so's and the shared constants +sys.path.append("basisu_py") + +import basisu_python as bu +import basisu_transcoder_python as bt +from constants import BasisTexFormat, BasisFlags +from constants import TranscoderTextureFormat as TF +from constants import TranscodeDecodeFlags as DF + +TRUE = 1 +FALSE = 0 + +# ------------------------------------------------------------ +# Utility: write raw bytes to a file +# ------------------------------------------------------------ + +def write_blob_to_file(filename: str, data: bytes) -> int: + print(f"write_blob_to_file: writing {len(data)} bytes to {filename!r}") + if not filename or data is None: + print(" ERROR: invalid filename or data") + return FALSE + + try: + with open(filename, "wb") as f: + f.write(data) + print(" OK") + return TRUE + except OSError as e: + print(" ERROR:", e) + return FALSE + +# ------------------------------------------------------------ +# TGA writer (24/32bpp) - port of write_tga_image() +# ------------------------------------------------------------ + +def write_tga_image(filename: str, w: int, h: int, has_alpha: bool, pixels_rgba_ptr: int) -> int: + """ + filename: path to TGA file + w, h: image dimensions + has_alpha: True for 32bpp, False for 24bpp + pixels_rgba_ptr: C pointer (uint64) to RGBA or RGB data in native heap + """ + print(f"write_tga_image: {filename!r}, {w}x{h}, has_alpha={has_alpha}, ptr=0x{pixels_rgba_ptr:x}") + if not filename or pixels_rgba_ptr == 0 or w <= 0 or h <= 0: + print(" ERROR: invalid args") + return -1 + + bytes_per_pixel = 4 if has_alpha else 3 + row_bytes = w * bytes_per_pixel + total_bytes = row_bytes * h + + # Create a ctypes buffer that views the native memory + SrcArrayType = ctypes.c_ubyte * total_bytes + src = SrcArrayType.from_address(pixels_rgba_ptr) + + try: + with open(filename, "wb") as f: + header = bytearray(18) + header[2] = 2 # uncompressed true-color + header[12] = w & 0xFF + header[13] = (w >> 8) & 0xFF + header[14] = h & 0xFF + header[15] = (h >> 8) & 0xFF + header[16] = 32 if has_alpha else 24 + header[17] = 8 if has_alpha else 0 # bottom-left origin (with or without alpha) + + f.write(header) + + # temp row buffer for BGRA/BGR + row_buf = bytearray(row_bytes) + + # TGA expects rows bottom-to-top + for y in range(h): + src_y = h - 1 - y + row_start = src_y * row_bytes + src_row = src[row_start:row_start + row_bytes] + + if has_alpha: + # RGBA -> BGRA + for x in range(w): + si = x*4 + di = x*4 + row_buf[di + 0] = src_row[si + 2] # B + row_buf[di + 1] = src_row[si + 1] # G + row_buf[di + 2] = src_row[si + 0] # R + row_buf[di + 3] = src_row[si + 3] # A + else: + # RGB -> BGR + for x in range(w): + si = x*3 + di = x*3 + row_buf[di + 0] = src_row[si + 2] # B + row_buf[di + 1] = src_row[si + 1] # G + row_buf[di + 2] = src_row[si + 0] # R + + f.write(row_buf) + + print(" Wrote TGA:", filename) + return 0 + except OSError as e: + print(" ERROR writing TGA:", e) + return -2 + +# ------------------------------------------------------------ +# ASTC writer - port of write_astc_file() +# ------------------------------------------------------------ + +def write_astc_file(filename: str, + blocks_ptr: int, + block_width: int, + block_height: int, + dim_x: int, + dim_y: int) -> int: + print(f"write_astc_file: {filename!r}, block={block_width}x{block_height}, dim={dim_x}x{dim_y}, ptr=0x{blocks_ptr:x}") + if not filename or blocks_ptr == 0: + print(" ERROR: invalid filename or pointer") + return 0 + + assert dim_x > 0 and dim_y > 0 + assert 4 <= block_width <= 12 + assert 4 <= block_height <= 12 + + num_blocks_x = (dim_x + block_width - 1) // block_width + num_blocks_y = (dim_y + block_height - 1) // block_height + total_blocks = num_blocks_x * num_blocks_y + total_bytes = total_blocks * 16 # 16 bytes per ASTC block + + print(f" num_blocks_x={num_blocks_x}, num_blocks_y={num_blocks_y}, total_blocks={total_blocks}, total_bytes={total_bytes}") + + # View native memory + BlockArray = ctypes.c_ubyte * total_bytes + src = BlockArray.from_address(blocks_ptr) + + try: + with open(filename, "wb") as f: + # Magic + f.write(bytes([0x13, 0xAB, 0xA1, 0x5C])) + + # Block dimensions x,y,z (=1) + f.write(bytes([block_width & 0xFF, block_height & 0xFF, 1])) + + # dim_x (24-bit LE) + f.write(bytes([dim_x & 0xFF, (dim_x >> 8) & 0xFF, (dim_x >> 16) & 0xFF])) + + # dim_y (24-bit LE) + f.write(bytes([dim_y & 0xFF, (dim_y >> 8) & 0xFF, (dim_y >> 16) & 0xFF])) + + # dim_z = 1 (24-bit LE) + f.write(bytes([1, 0, 0])) + + # Block data + f.write(bytes(src)) + + print(" Wrote ASTC:", filename) + return 1 + except OSError as e: + print(" ERROR writing ASTC:", e) + return 0 + +# ------------------------------------------------------------ +# Procedural RGBA pattern (ported & fixed version) +# ------------------------------------------------------------ + +def create_pretty_rgba_pattern(w: int, h: int) -> bytes: + print(f"create_pretty_rgba_pattern: {w}x{h}") + if w <= 0 or h <= 0: + return None + + out = bytearray(w * h * 4) + for y in range(h): + for x in range(w): + fx = x / float(w) + fy = y / float(h) + + # Colorful plasma-type formula + v = math.sin(fx * 12.0 + fy * 4.0) + v += math.sin(fy * 9.0 - fx * 6.0) + v += math.sin((fx + fy) * 7.0) + v = v * 0.25 + 0.5 # scale 0..1 + + L = 1.5 + + r = int(round(255.0 * math.sin(v * 6.28) * L)) + g = int(round(255.0 * (1.0 - v) * L)) + b = int(round(255.0 * v * L)) + + if r < 0: r = 0 + elif r > 255: r = 255 + if g < 0: g = 0 + elif g > 255: g = 255 + if b < 0: b = 0 + elif b > 255: b = 255 + + i = (y * w + x) * 4 + out[i+0] = r + out[i+1] = g + out[i+2] = b + out[i+3] = 255 + + return bytes(out) + +# ------------------------------------------------------------ +# Transcode a KTX2 blob (ported from transcode_ktx2_file) +# ------------------------------------------------------------ + +def transcode_ktx2_file(ktx2_data: bytes) -> int: + if not ktx2_data: + print("transcode_ktx2_file: empty data") + return FALSE + + size = len(ktx2_data) + print(f"transcode_ktx2_file: size={size} bytes") + + if size > 0xFFFFFFFF: + print(" ERROR: size too large for 32-bit length") + return FALSE + + # Allocate memory in transcoder heap and copy KTX2 data + ktx2_data_ofs = bt.alloc(size) + if not ktx2_data_ofs: + print(" ERROR: bt.alloc failed") + return FALSE + + print(f" KTX2 data allocated at 0x{ktx2_data_ofs:x}") + ctypes.memmove(ktx2_data_ofs, ktx2_data, size) + + # Open KTX2 + ktx2_handle = bt.ktx2_open(ktx2_data_ofs, size) + if not ktx2_handle: + print(" ERROR: bt.ktx2_open failed") + bt.free(ktx2_data_ofs) + return FALSE + + print(f" KTX2 handle = 0x{ktx2_handle:x}") + + if not bt.ktx2_is_ldr(ktx2_handle): + print(" ERROR: This sample only handles LDR KTX2 files") + bt.ktx2_close(ktx2_handle) + bt.free(ktx2_data_ofs) + return FALSE + + if not bt.ktx2_start_transcoding(ktx2_handle): + print(" ERROR: bt.ktx2_start_transcoding failed") + bt.ktx2_close(ktx2_handle) + bt.free(ktx2_data_ofs) + return FALSE + + width = bt.ktx2_get_width(ktx2_handle) + height = bt.ktx2_get_height(ktx2_handle) + levels = bt.ktx2_get_levels(ktx2_handle) + faces = bt.ktx2_get_faces(ktx2_handle) + layers = bt.ktx2_get_layers(ktx2_handle) + + basis_tex_format = bt.ktx2_get_basis_tex_format(ktx2_handle) + block_width = bt.ktx2_get_block_width(ktx2_handle) + block_height = bt.ktx2_get_block_height(ktx2_handle) + is_srgb = bt.ktx2_is_srgb(ktx2_handle) + + print(f"KTX2 Dimensions: {width}x{height}, Levels={levels}, Faces={faces}, Layers={layers}") + print(f"basis_tex_format: {basis_tex_format}") + print(f"Block dimensions: {block_width}x{block_height}") + print(f"is sRGB: {is_srgb}") + + if layers < 1: + layers = 1 + + assert width >= 1 and height >= 1 + assert levels >= 1 + assert faces in (1, 6) + + # Optional: separate transcode state (thread-local) + trans_state = bt.ktx2_create_transcode_state() + print(f"trans_state handle = 0x{trans_state:x}") + + for level_index in range(levels): + for layer_index in range(layers): + for face_index in range(faces): + print(f"- Level {level_index}, layer {layer_index}, face {face_index}") + ow = bt.ktx2_get_level_orig_width(ktx2_handle, level_index, layer_index, face_index) + oh = bt.ktx2_get_level_orig_height(ktx2_handle, level_index, layer_index, face_index) + aw = bt.ktx2_get_level_actual_width(ktx2_handle, level_index, layer_index, face_index) + ah = bt.ktx2_get_level_actual_height(ktx2_handle, level_index, layer_index, face_index) + nbx = bt.ktx2_get_level_num_blocks_x(ktx2_handle, level_index, layer_index, face_index) + nby = bt.ktx2_get_level_num_blocks_y(ktx2_handle, level_index, layer_index, face_index) + tblocks = bt.ktx2_get_level_total_blocks(ktx2_handle, level_index, layer_index, face_index) + alpha_flag = bt.ktx2_get_level_alpha_flag(ktx2_handle, level_index, layer_index, face_index) + iframe_flag = bt.ktx2_get_level_iframe_flag(ktx2_handle, level_index, layer_index, face_index) + + print(f" Orig dimensions: {ow}x{oh}, actual: {aw}x{ah}") + print(f" Block dims: {nbx}x{nby}, total blocks: {tblocks}") + print(f" Alpha={alpha_flag}, I-frame={iframe_flag}") + + # 1) Transcode to RGBA32 and write TGA + tga_name = f"transcoded_{level_index}_{layer_index}_{face_index}.tga" + trans_size_rgba = bt.basis_compute_transcoded_image_size_in_bytes(TF.TF_RGBA32, ow, oh) + assert trans_size_rgba > 0 + rgba_ofs = bt.alloc(trans_size_rgba) + print(f" RGBA buf ofs=0x{rgba_ofs:x}, size={trans_size_rgba}") + + decode_flags = 0 + ok = bt.ktx2_transcode_image_level( + ktx2_handle, + level_index, layer_index, face_index, + rgba_ofs, + trans_size_rgba, + TF.TF_RGBA32, + decode_flags, + 0, 0, -1, -1, + trans_state + ) + print(" ktx2_transcode_image_level(RGBA32):", ok) + if not ok: + bt.free(rgba_ofs) + bt.ktx2_destroy_transcode_state(trans_state) + bt.ktx2_close(ktx2_handle) + bt.free(ktx2_data_ofs) + return FALSE + + write_tga_image(tga_name, ow, oh, True, rgba_ofs) + bt.free(rgba_ofs) + + # 2) Transcode to ASTC and write .astc file + astc_name = f"transcoded_{level_index}_{layer_index}_{face_index}.astc" + target_tf = bt.basis_get_transcoder_texture_format_from_basis_tex_format(basis_tex_format) + print(f" Target ASTC TF={target_tf}") + + trans_size_astc = bt.basis_compute_transcoded_image_size_in_bytes(target_tf, ow, oh) + assert trans_size_astc > 0 + astc_ofs = bt.alloc(trans_size_astc) + print(f" ASTC buf ofs=0x{astc_ofs:x}, size={trans_size_astc}") + + ok = bt.ktx2_transcode_image_level( + ktx2_handle, + level_index, layer_index, face_index, + astc_ofs, + trans_size_astc, + target_tf, + 0, 0, 0, -1, -1, + trans_state + ) + print(" ktx2_transcode_image_level(ASTC):", ok) + if not ok: + bt.free(astc_ofs) + bt.ktx2_destroy_transcode_state(trans_state) + bt.ktx2_close(ktx2_handle) + bt.free(ktx2_data_ofs) + return FALSE + + write_astc_file(astc_name, astc_ofs, block_width, block_height, ow, oh) + bt.free(astc_ofs) + + bt.ktx2_destroy_transcode_state(trans_state) + bt.ktx2_close(ktx2_handle) + bt.free(ktx2_data_ofs) + + print("transcode_ktx2_file: success") + return TRUE + +# ------------------------------------------------------------ +# main() equivalent +# ------------------------------------------------------------ + +def main(): + print("example_capi_python:") + + # Init encoder (which initializes transcoder) + print("Calling bu.init() ...") + bu.init() + + print("Calling bt.init() ...") + bt.init() + + # Optional debug control if bound + if hasattr(bu, "enable_debug_printf"): + print("Disabling debug printf from encoder") + bu.enable_debug_printf(False) + + # Generate test image + W, H = 512, 512 + src_image = create_pretty_rgba_pattern(W, H) + if src_image is None: + print("ERROR: create_pretty_rgba_pattern failed") + return 1 + + # Save test image for inspection + print("Writing test_image.tga ...") + # use Python-level TGA writer by allocating a temporary native buffer + tmp_ofs = bt.alloc(len(src_image)) + ctypes.memmove(tmp_ofs, src_image, len(src_image)) + write_tga_image("test_image.tga", W, H, True, tmp_ofs) + bt.free(tmp_ofs) + + # Compress to KTX2 + print("Creating comp_params ...") + comp_params = bu.new_params() + print(" comp_params handle:", comp_params) + + img_ofs = bu.alloc(W * H * 4) + print(f"Allocated encoder image buffer at 0x{img_ofs:x}") + ctypes.memmove(img_ofs, src_image, W * H * 4) + + print("Calling bu.comp_params_set_image_rgba32(...)") + ok = bu.set_image_rgba32(comp_params, 0, img_ofs, W, H, W * 4) + print(" set_image_rgba32:", ok) + if not ok: + print("ERROR: bu_comp_params_set_image_rgba32 failed") + return 1 + + bu.free(img_ofs) + + print("Compressing to XUASTC LDR 8x5 KTX2 ...") + basis_tex_format = BasisTexFormat.cXUASTC_LDR_8x5 + quality_level = 85 + effort_level = 2 + flags = (BasisFlags.KTX2_OUTPUT | + BasisFlags.SRGB | + BasisFlags.THREADED | + BasisFlags.GEN_MIPS_CLAMP | + BasisFlags.PRINT_STATS | + BasisFlags.PRINT_STATUS) + + ok = bu.compress(comp_params, + tex_format=basis_tex_format, + quality=quality_level, + effort=effort_level, + flags=flags, + rdo_quality=0.0) + print(" bu.compress:", ok) + if not ok: + print("ERROR: bu_compress_texture failed") + return 1 + + comp_size = bu.get_comp_data_size(comp_params) + print("Compressed size:", comp_size) + if comp_size == 0: + print("ERROR: bu_comp_params_get_comp_data_size failed") + return 1 + + comp_ofs = bu.get_comp_data_ofs(comp_params) + print(f"Compressed data ptr=0x{comp_ofs:x}") + + # Copy compressed data into Python bytes + CompArray = ctypes.c_ubyte * comp_size + comp_buf = CompArray.from_address(comp_ofs) + comp_bytes = bytes(comp_buf) + + print("Writing test.ktx2 ...") + if not write_blob_to_file("test.ktx2", comp_bytes): + print("ERROR: write_blob_to_file failed") + return 1 + + # Transcode using the native transcoder API + print("Now transcoding test.ktx2 via C API ...") + if not transcode_ktx2_file(comp_bytes): + print("ERROR: transcode_ktx2_file failed") + return 1 + + bu.delete_params(comp_params) + + print("Success") + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/external/basis_universal/python/lowlevel_test_native/test_transcoder_basic.py b/external/basis_universal/python/lowlevel_test_native/test_transcoder_basic.py new file mode 100644 index 0000000000..7aa87720f5 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_native/test_transcoder_basic.py @@ -0,0 +1,24 @@ +# test_transcoder_basic.py +import sys +import os + +# Make sure Python can find the .so file +sys.path.append("basisu_py") # Adjust if needed + +try: + import basisu_transcoder_python as bt +except ImportError as e: + print("Failed to import basisu_transcoder_python:", e) + raise + +print("Successfully loaded basisu_transcoder_python") + +# Call bt_get_version() via the pybind11 binding +try: + version = bt.get_version() + print("Transcoder version:", version) +except Exception as e: + print("Error calling bt_get_version:", e) + raise + +print("Basic transcoder test complete.") diff --git a/external/basis_universal/python/lowlevel_test_wasm/__init__.py b/external/basis_universal/python/lowlevel_test_wasm/__init__.py new file mode 100644 index 0000000000..143f486c05 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_wasm/__init__.py @@ -0,0 +1 @@ +# __init__.py diff --git a/external/basis_universal/python/lowlevel_test_wasm/basic_test.py b/external/basis_universal/python/lowlevel_test_wasm/basic_test.py new file mode 100644 index 0000000000..5fbede022c --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_wasm/basic_test.py @@ -0,0 +1,58 @@ +import wasmtime +import ctypes + +# --- Engine --- +engine = wasmtime.Engine() + +# --- Store --- +store = wasmtime.Store(engine) + +# --- WASI config --- +wasi = wasmtime.WasiConfig() +wasi.argv = ["basisu_module_st"] +wasi.inherit_stdout() # <-- tell WASI to use the host stdout +wasi.inherit_stderr() +store.set_wasi(wasi) + +# --- Load module --- +module = wasmtime.Module.from_file(engine, "basisu_py/wasm/basisu_module_st.wasm") + +# --- Linker + WASI --- +linker = wasmtime.Linker(engine) +linker.define_wasi() + +# --- Instantiate --- +instance = linker.instantiate(store, module) +print("Single-threaded WASM instantiated OK") + +# --- Exports --- +exports = instance.exports(store) + +get_version = exports["bu_get_version"] +alloc = exports["bu_alloc"] +free = exports["bu_free"] +memory = exports["memory"] + +# --- Version --- +version = get_version(store) +print("Version =", version) + +# --- Alloc --- +ptr = alloc(store, 64) +print("Allocated ptr =", ptr) + +# --- Access WASM memory properly --- +data_len = memory.data_len(store) +raw_ptr = memory.data_ptr(store) # ctypes pointer +addr = ctypes.addressof(raw_ptr.contents) # convert to integer pointer + +# Create a byte array view into WASM memory +buf = (ctypes.c_ubyte * data_len).from_address(addr) + +# Write TEST at allocated ptr +buf[ptr : ptr + 4] = b"TEST" +print("Wrote TEST into WASM memory.") + +# --- Free --- +free(store, ptr) +print("Memory free OK.") diff --git a/external/basis_universal/python/lowlevel_test_wasm/basisu_wasm.py b/external/basis_universal/python/lowlevel_test_wasm/basisu_wasm.py new file mode 100644 index 0000000000..08b3933555 --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_wasm/basisu_wasm.py @@ -0,0 +1,148 @@ +# basisu_wasm.py +import wasmtime +import ctypes +import sys + +sys.path.append("basisu_py") # our shared .py files + +from constants import * + +class BasisuWasm: + def __init__(self, path): + self.path = path + self.engine = None + self.store = None + self.memory = None + self.exports = None + + # ----------------------------------------------- + # Internal helper: build WASI + Wasmtime engine + # ----------------------------------------------- + def _init_engine(self): + self.engine = wasmtime.Engine() + self.store = wasmtime.Store(self.engine) + + wasi = wasmtime.WasiConfig() + wasi.argv = ["basisu"] + wasi.inherit_stdout() + wasi.inherit_stderr() + self.store.set_wasi(wasi) + + return wasi + + # ----------------------------------------------- + # Create linker and instantiate WASM module + # ----------------------------------------------- + def load(self): + self._init_engine() + + module = wasmtime.Module.from_file(self.engine, self.path) + linker = wasmtime.Linker(self.engine) + linker.define_wasi() + + instance = linker.instantiate(self.store, module) + + self.exports = instance.exports(self.store) + self.memory = self.exports["memory"] + + if "bu_init" in self.exports: + self.exports["bu_init"](self.store) + + print("WASM loaded:", self.path) + + # ----------------------------------------------- + # Read/write WASM linear memory via ctypes + # ----------------------------------------------- + def _wasm_buf(self): + raw_ptr = self.memory.data_ptr(self.store) + length = self.memory.data_len(self.store) + addr = ctypes.addressof(raw_ptr.contents) + return (ctypes.c_ubyte * length).from_address(addr) + + # ----------------------------------------------- + # Exported API accessors + # ----------------------------------------------- + def init(self): + return self.exports["bu_init"](self.store) + + def version(self): + return self.exports["bu_get_version"](self.store) + + def alloc(self, size): + return self.exports["bu_alloc"](self.store, size) + + def free(self, ptr): + return self.exports["bu_free"](self.store, ptr) + + def new_params(self): + return self.exports["bu_new_comp_params"](self.store) + + def delete_params(self, ptr): + return self.exports["bu_delete_comp_params"](self.store, ptr) + + def set_image_rgba32(self, params, image_index, img_ptr, w, h, pitch): + return self.exports["bu_comp_params_set_image_rgba32"]( + self.store, params, image_index, img_ptr, w, h, pitch + ) + + def set_image_float_rgba(self, params, image_index, img_ptr, w, h, pitch): + return self.exports["bu_comp_params_set_image_float_rgba"]( + self.store, params, image_index, img_ptr, w, h, pitch + ) + + # Normally quality_level controls the quality. + # If quality_level==-1, then rdo_quality (a low-level parameter) directly + # controls each codec's quality setting. Normally set to 0. + + def compress_texture_lowlevel(self, params, + tex_format, + quality_level, + effort_level, + flags_and_quality, + rdo_quality): + + return self.exports["bu_compress_texture"]( + self.store, + params, + tex_format, + quality_level, + effort_level, + flags_and_quality, + rdo_quality + ) + + def compress(self, params, + tex_format=BasisTexFormat.cUASTC_LDR_4x4, + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.NONE, + rdo_quality=0.0): + + return bool(self.compress_texture_lowlevel( + params, + tex_format, + quality, + effort, + flags, + rdo_quality + )) + + def get_comp_data_ofs(self, params): + return self.exports["bu_comp_params_get_comp_data_ofs"](self.store, params) + + def get_comp_data_size(self, params): + return self.exports["bu_comp_params_get_comp_data_size"](self.store, params) + + # ----------------------------------------------- + # Copy bytes into WASM memory + # ----------------------------------------------- + def write_bytes(self, wasm_ptr, data: bytes): + buf = self._wasm_buf() + buf[wasm_ptr:wasm_ptr+len(data)] = data + + # ----------------------------------------------- + # Read bytes from WASM memory + # ----------------------------------------------- + def read_bytes(self, wasm_ptr, size): + buf = self._wasm_buf() + return bytes(buf[wasm_ptr:wasm_ptr+size]) diff --git a/external/basis_universal/python/lowlevel_test_wasm/compress_test.py b/external/basis_universal/python/lowlevel_test_wasm/compress_test.py new file mode 100644 index 0000000000..285bb105fe --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_wasm/compress_test.py @@ -0,0 +1,63 @@ +# compress_test.py +from .basisu_wasm import * + +# === Load WASM === +codec = BasisuWasm("basisu_py/wasm/basisu_module_st.wasm") +codec.load() + +print("Version =", codec.version()) + +# === Build test image === +W, H = 256, 256 +BYTES_PER_PIXEL = 4 +pitch = W * BYTES_PER_PIXEL + +img = bytearray(W * H * 4) + +for y in range(H): + for x in range(W): + i = (y * W + x) * 4 + img[i + 0] = x & 0xFF # R + img[i + 1] = y & 0xFF # G + img[i + 2] = (x ^ y) & 0xFF # B + img[i + 3] = 255 # A + +# === Upload image to WASM memory === +img_ptr = codec.alloc(len(img)) +codec.write_bytes(img_ptr, img) + +# === Create comp_params === +params = codec.new_params() + +# === Set image into comp_params === +ok = codec.set_image_rgba32(params, 0, img_ptr, W, H, pitch) +print("Set image:", ok) + +# === Compress === +ok = codec.compress( + params, + tex_format=BasisTexFormat.cUASTC_LDR_4x4, + quality=100, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB, + rdo_quality=0.0 +) +print("Compress result:", ok) + +# === Retrieve compressed blob === +ofs = codec.get_comp_data_ofs(params) +size = codec.get_comp_data_size(params) +print("Output size =", size) + +comp_data = codec.read_bytes(ofs, size) +print("First 16 bytes:", comp_data[:16]) + +# === Save to KTX2 === +with open("test.ktx2", "wb") as f: + f.write(comp_data) + +print("File written: test.ktx2") + +# === Cleanup === +codec.delete_params(params) +codec.free(img_ptr) diff --git a/external/basis_universal/python/lowlevel_test_wasm/compress_test_float.py b/external/basis_universal/python/lowlevel_test_wasm/compress_test_float.py new file mode 100644 index 0000000000..3f60cb7b4d --- /dev/null +++ b/external/basis_universal/python/lowlevel_test_wasm/compress_test_float.py @@ -0,0 +1,76 @@ +# compress_test_float.py + +from .basisu_wasm import BasisuWasm, BasisTexFormat, BasisEffort, BasisFlags, BasisQuality +import struct # for packing floats + +# === Load WASM === +codec = BasisuWasm("basisu_py/wasm/basisu_module_st.wasm") +codec.load() + +print("Version =", codec.version()) + +# === Build a 256x256 FLOAT RGBA image === +W, H = 256, 256 +BYTES_PER_PIXEL = 16 # float32 * 4 +pitch = W * BYTES_PER_PIXEL + +# Float image stored as bytearray of packed floats +img = bytearray(W * H * BYTES_PER_PIXEL) + +for y in range(H): + for x in range(W): + # Create some float HDR gradient pattern + r = float(x) / W # 0.0 ? 1.0 + g = float(y) / H # 0.0 ? 1.0 + b = float(x ^ y) / 255.0 # quirky pattern + a = 1.0 + + i = (y * W + x) * 4 + + # pack into img bytearray + struct.pack_into("ffff", img, i*4, r, g, b, a) + +print("Created FLOAT RGBA image.") + +# === Upload to WASM memory === +img_ptr = codec.alloc(len(img)) +codec.write_bytes(img_ptr, img) +print("Copied float image into WASM heap at", img_ptr) + +# === Create params === +params = codec.new_params() + +# === Set FLOAT RGBA image === +ok = codec.set_image_float_rgba(params, 0, img_ptr, W, H, pitch) +print("Set float RGBA:", ok) + +# === Compress using HDR UASTC 4x4 === +ok = codec.compress( + params, + tex_format=BasisTexFormat.cUASTC_HDR_4x4, + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.REC2020, # optional: HDR color space + rdo_quality=0.0 +) + +print("Compression result:", ok) + +# === Retrieve compressed HDR KTX2 === +ofs = codec.get_comp_data_ofs(params) +size = codec.get_comp_data_size(params) + +print("Output size =", size) +data = codec.read_bytes(ofs, size) + +print("First 16 bytes:", data[:16]) + +# === Save to test_hdr.ktx2 === +with open("test_hdr.ktx2", "wb") as f: + f.write(data) + +print("Wrote test_hdr.ktx2") + +# === Cleanup === +codec.delete_params(params) +codec.free(img_ptr) diff --git a/external/basis_universal/python/pyproject.toml b/external/basis_universal/python/pyproject.toml new file mode 100644 index 0000000000..e893c577fc --- /dev/null +++ b/external/basis_universal/python/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["setuptools>=65", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "basisu-py" +version = "0.2.0" +description = "Python bindings for Basis Universal encoder/transcoder v2.x with native + WASM backend" +authors = [ + { name = "Binomial LLC", email = "stephanie@binomial.info" } +] +license = { text = "Apache 2.0" } +readme = "README.md" +requires-python = ">=3.8" + +dependencies = [ + "numpy", + "Pillow", + "imageio>=2.22", + "wasmtime", +] + +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: C++", + "Operating System :: OS Independent", + "License :: OSI Approved :: Apache Software License", +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +include = ["basisu_py*"] + +[tool.setuptools.package-data] +basisu_py = [ + "*.so", + "*.pyd", + "*.py", + "wasm/*.wasm", + "wasm/*.py", + "README.md", +] diff --git a/external/basis_universal/python/tests/__init__.py b/external/basis_universal/python/tests/__init__.py new file mode 100644 index 0000000000..2badd27e81 --- /dev/null +++ b/external/basis_universal/python/tests/__init__.py @@ -0,0 +1 @@ +# python/tests/__init__.py diff --git a/external/basis_universal/python/tests/test_backend_loading.py b/external/basis_universal/python/tests/test_backend_loading.py new file mode 100644 index 0000000000..598bae84f8 --- /dev/null +++ b/external/basis_universal/python/tests/test_backend_loading.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +import numpy as np +from PIL import Image + +from basisu_py.codec import Encoder, EncoderBackend +from basisu_py.constants import BasisTexFormat + +print("========== BACKEND LOADING TEST ==========\n") + +# -------------------------------------------------------------- +# 1. Test native backend (if available) +# -------------------------------------------------------------- +print("Testing native backend...") + +try: + enc_native = Encoder(backend=EncoderBackend.NATIVE) + print(" [OK] Native backend loaded") +except Exception as e: + print(" [FAIL] Native backend failed to load:", e) + enc_native = None + +# If native loaded, test very basic functionality +if enc_native: + try: + version = enc_native._native.get_version() + print(f" Native get_version() ? {version}") + + ptr = enc_native._native.alloc(16) + print(f" Native alloc() returned ptr = {ptr}") + + enc_native._native.free(ptr) + print(f" Native free() OK") + + print(" [OK] Native basic operations working.\n") + except Exception as e: + print(" [FAIL] Native operations error:", e) +else: + print(" Skipping native basic operations.\n") + +# -------------------------------------------------------------- +# 2. Test WASM backend +# -------------------------------------------------------------- +print("\nTesting WASM backend...") + +try: + enc_wasm = Encoder(backend=EncoderBackend.WASM) + print(" [OK] WASM backend loaded") +except Exception as e: + print(" [FAIL] WASM backend failed to load:", e) + enc_wasm = None + +# If WASM loaded, test basic methods +if enc_wasm and enc_wasm._wasm is not None: + try: + version = enc_wasm._wasm.get_version() + print(f" WASM get_version() ? {version}") + + ptr = enc_wasm._wasm.alloc(16) + print(f" WASM alloc() returned ptr = {ptr}") + + enc_wasm._wasm.free(ptr) + print(f" WASM free() OK") + + print(" [OK] WASM basic operations working.\n") + except Exception as e: + print(" [FAIL] WASM operations error:", e) +else: + print(" Skipping WASM basic operations.\n") + +print("\n========== DONE ==========\n") diff --git a/external/basis_universal/python/tests/test_basic_backend_selection.py b/external/basis_universal/python/tests/test_basic_backend_selection.py new file mode 100644 index 0000000000..6cfb75ced6 --- /dev/null +++ b/external/basis_universal/python/tests/test_basic_backend_selection.py @@ -0,0 +1,7 @@ +from basisu_py import Encoder + +enc = Encoder() # AUTO mode +print("Encoder backend:", enc.backend) +print("Native loaded:", enc._native is not None) +print("WASM loaded:", enc._wasm is not None) +print("Version:", enc._native.get_version() if enc._native else enc._wasm.get_version()) diff --git a/external/basis_universal/python/tests/test_basic_decode.py b/external/basis_universal/python/tests/test_basic_decode.py new file mode 100644 index 0000000000..515c31ac87 --- /dev/null +++ b/external/basis_universal/python/tests/test_basic_decode.py @@ -0,0 +1,19 @@ +from basisu_py import Transcoder +from PIL import Image +import numpy as np + +# Load input file +with open("test.ktx2", "rb") as f: + data = f.read() + +# Decode (AUTO backend) +t = Transcoder() +rgba = t.decode_rgba(data) # returns HxWx4 uint8 NumPy array + +print("Decoded:", rgba.shape, rgba.dtype) + +# Convert to Pillow Image and save +img = Image.fromarray(rgba, mode="RGBA") +img.save("decoded.png") + +print("Wrote decoded.png") \ No newline at end of file diff --git a/external/basis_universal/python/tests/test_basic_transcode.py b/external/basis_universal/python/tests/test_basic_transcode.py new file mode 100644 index 0000000000..fc10a2305d --- /dev/null +++ b/external/basis_universal/python/tests/test_basic_transcode.py @@ -0,0 +1,10 @@ +from basisu_py import Transcoder + +with open("test.ktx2", "rb") as f: + data = f.read() + +t = Transcoder() # AUTO backend +img = t.decode_rgba(data) + +print("Decoded shape:", img.shape) +print("dtype:", img.dtype) diff --git a/external/basis_universal/python/tests/test_basic_wasm_selection.py b/external/basis_universal/python/tests/test_basic_wasm_selection.py new file mode 100644 index 0000000000..bdad65fb11 --- /dev/null +++ b/external/basis_universal/python/tests/test_basic_wasm_selection.py @@ -0,0 +1,6 @@ +from basisu_py import Transcoder +from basisu_py.transcoder import TranscoderBackend + +t = Transcoder(backend=TranscoderBackend.WASM) +print("Backend:", t.backend_name) +t.decode_rgba(open("test.ktx2","rb").read()) diff --git a/external/basis_universal/python/tests/test_compress_swirl.py b/external/basis_universal/python/tests/test_compress_swirl.py new file mode 100644 index 0000000000..a02f7d5754 --- /dev/null +++ b/external/basis_universal/python/tests/test_compress_swirl.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import numpy as np +from PIL import Image +from math import sin, cos, atan2, hypot + +from basisu_py.codec import Encoder, EncoderBackend +from basisu_py.constants import BasisTexFormat, BasisQuality, BasisEffort, BasisFlags + + +# -------------------------------------------------------------- +# Procedural swirl pattern (RGBA8) +# -------------------------------------------------------------- +def make_swirl_image(w=256, h=256): + arr = np.zeros((h, w, 4), dtype=np.uint8) + + cx = w / 2.0 + cy = h / 2.0 + + for y in range(h): + for x in range(w): + dx = x - cx + dy = y - cy + + dist = hypot(dx, dy) + angle = atan2(dy, dx) + + r = int((sin(dist * 0.15) * 0.5 + 0.5) * 255) + g = int((sin(angle * 3.0) * 0.5 + 0.5) * 255) + b = int((cos(dist * 0.10 + angle * 2.0) * 0.5 + 0.5) * 255) + + arr[y, x] = (r, g, b, 255) + + return arr + + +# -------------------------------------------------------------- +# Test encode using a given backend +# -------------------------------------------------------------- +def compress_swirl(backend, outfile): + print(f"\n========== Testing {backend} backend ==========") + + # Build procedural image + swirl = make_swirl_image(256, 256) + print("Generated swirl image:", swirl.shape) + + # Create encoder + enc = Encoder(backend=backend) + + # Compress + blob = enc.compress( + swirl, + format=BasisTexFormat.cUASTC_LDR_4x4, + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB + ) + + print(f"Compressed blob size: {len(blob)} bytes") + + # Save output + with open(outfile, "wb") as f: + f.write(blob) + + print(f"Wrote: {outfile}") + print("==============================================") + + +# -------------------------------------------------------------- +# Main +# -------------------------------------------------------------- +if __name__ == "__main__": + # Test native backend + try: + compress_swirl(EncoderBackend.NATIVE, "swirl_native.ktx2") + except Exception as e: + print("Native backend ERROR:", e) + + # Test WASM backend + try: + compress_swirl(EncoderBackend.WASM, "swirl_wasm.ktx2") + except Exception as e: + print("WASM backend ERROR:", e) diff --git a/external/basis_universal/python/tests/test_compress_swirl_hdr.py b/external/basis_universal/python/tests/test_compress_swirl_hdr.py new file mode 100644 index 0000000000..34e0adb9ff --- /dev/null +++ b/external/basis_universal/python/tests/test_compress_swirl_hdr.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +import numpy as np +from math import sin, cos, atan2, hypot +from basisu_py.codec import Encoder, EncoderBackend +from basisu_py.constants import BasisTexFormat, BasisQuality, BasisEffort, BasisFlags + + +# -------------------------------------------------------------- +# Procedural HDR swirl pattern (float32 RGBA) +# -------------------------------------------------------------- +def make_hdr_swirl_image(w=256, h=256): + arr = np.zeros((h, w, 4), dtype=np.float32) + + cx = w / 2.0 + cy = h / 2.0 + + for y in range(h): + for x in range(w): + dx = x - cx + dy = y - cy + dist = hypot(dx, dy) + angle = atan2(dy, dx) + + r = (sin(dist * 0.15) * 0.5 + 0.5) + g = (sin(angle * 3.0) * 0.5 + 0.5) + b = (cos(dist * 0.10 + angle * 2.0) * 0.5 + 0.5) + + arr[y, x] = (r, g, b, 1.0) # full alpha + + return arr + + +# -------------------------------------------------------------- +# Test encode using a given backend +# -------------------------------------------------------------- +def compress_hdr_swirl(backend, outfile): + print(f"\n========== Testing HDR {backend} backend ==========") + + hdr = make_hdr_swirl_image(256, 256) + print("Generated HDR swirl image:", hdr.shape, hdr.dtype) + + enc = Encoder(backend=backend) + + blob = enc.compress( + hdr, + format=-1, # auto-select HDR (UASTC_HDR_4x4) + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB + ) + + print(f"Compressed blob size: {len(blob)} bytes") + + with open(outfile, "wb") as f: + f.write(blob) + + print(f"Wrote: {outfile}") + print("==============================================") + + +# -------------------------------------------------------------- +# Main +# -------------------------------------------------------------- +if __name__ == "__main__": + # Native backend + try: + compress_hdr_swirl(EncoderBackend.NATIVE, "hdr_swirl_native.ktx2") + except Exception as e: + print("Native HDR backend ERROR:", e) + + # WASM backend + try: + compress_hdr_swirl(EncoderBackend.WASM, "hdr_swirl_wasm.ktx2") + except Exception as e: + print("WASM HDR backend ERROR:", e) diff --git a/external/basis_universal/python/tests/test_transcoder_astc.py b/external/basis_universal/python/tests/test_transcoder_astc.py new file mode 100644 index 0000000000..306834bde0 --- /dev/null +++ b/external/basis_universal/python/tests/test_transcoder_astc.py @@ -0,0 +1,18 @@ +from basisu_py import Transcoder +from astc_writer import write_astc_file + +# Load a .ktx2 +data = open("input.ktx2", "rb").read() +t = Transcoder() + +# Transcode to ASTC +h = t.open(data) +bw = t.get_block_width(h) # or basis_get_block_width(astc_tfmt) +bh = t.get_block_height(h) +tfmt = t.basis_get_transcoder_texture_format_from_basis_tex_format( + t.get_basis_tex_format(h) +) + +blocks = t.transcode_tfmt(data, tfmt) +write_astc_file("output.astc", blocks, bw, bh, t.get_width(h), t.get_height(h)) +t.close(h) diff --git a/external/basis_universal/python/tests/test_transcoder_backend_loading.py b/external/basis_universal/python/tests/test_transcoder_backend_loading.py new file mode 100644 index 0000000000..e0d1ed2f4a --- /dev/null +++ b/external/basis_universal/python/tests/test_transcoder_backend_loading.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +import sys +from basisu_py.transcoder import Transcoder, TranscoderBackend +from basisu_py.constants import BasisTexFormat + +print("========== TESTING TRANSCODER BACKENDS ==========\n") + +# Load some test data (ensure test.ktx2 exists) +try: + test_data = open("test.ktx2", "rb").read() + print("[INFO] Loaded test.ktx2") +except FileNotFoundError: + print("[ERROR] test.ktx2 not found. Create one first via encoder tests.") + sys.exit(1) + + +# ------------------------------------------------------------------- +# 1. Test NATIVE backend +# ------------------------------------------------------------------- +print("\n--- Testing NATIVE transcoder backend ---") + +try: + t_native = Transcoder(TranscoderBackend.NATIVE) + print(" [OK] Native backend loaded") + + version = t_native.get_version() + print(f" Native get_version() = {version}") + + # Open KTX2 + raw = t_native.open(test_data) + print(" [OK] Opened KTX2 (native)") + + # Query some basic properties + print(" Width :", t_native.get_width(raw)) + print(" Height:", t_native.get_height(raw)) + print(" Levels:", t_native.get_levels(raw)) + + # Cleanup + t_native.close(raw) + print(" [OK] Native transcoder basic operations working.") + +except Exception as e: + print(" [FAIL] Native transcoder error:", e) + + +# ------------------------------------------------------------------- +# 2. Test WASM backend +# ------------------------------------------------------------------- +print("\n--- Testing WASM transcoder backend ---") + +try: + t_wasm = Transcoder(TranscoderBackend.WASM) + print(" [OK] WASM backend loaded") + + version = t_wasm.get_version() + print(f" WASM get_version() = {version}") + + raw = t_wasm.open(test_data) + print(" [OK] Opened KTX2 (wasm)") + + print(" Width :", t_wasm.get_width(raw)) + print(" Height:", t_wasm.get_height(raw)) + print(" Levels:", t_wasm.get_levels(raw)) + + t_wasm.close(raw) + print(" [OK] WASM transcoder basic operations working.") + +except Exception as e: + print(" [FAIL] WASM transcoder error:", e) + + +print("\n========== DONE ==========") diff --git a/external/basis_universal/python/tests/test_transcoder_end_to_end.py b/external/basis_universal/python/tests/test_transcoder_end_to_end.py new file mode 100644 index 0000000000..5e4825aa50 --- /dev/null +++ b/external/basis_universal/python/tests/test_transcoder_end_to_end.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +Full end-to-end transcoder test with automatic fallback. + +- Generates a swirl image +- Compresses it using native OR WASM (AUTO mode) +- Writes test.ktx2 +- Decodes it using whichever backends are available: + * AUTO (native if present, otherwise WASM) + * Native (if available) + * WASM (if available) +- Produces PNG outputs for all successful backends +""" + +import numpy as np +from math import sin, cos, atan2, hypot +from PIL import Image +import sys + +from basisu_py.codec import Encoder, EncoderBackend +from basisu_py.transcoder import Transcoder, TranscoderBackend +from basisu_py.constants import ( + BasisTexFormat, + BasisQuality, + BasisEffort, + BasisFlags, +) + + +# ------------------------------------------------------------------- +# Create an RGBA swirl test image +# ------------------------------------------------------------------- +def make_swirl(w=256, h=256): + arr = np.zeros((h, w, 4), dtype=np.uint8) + + cx, cy = w / 2.0, h / 2.0 + + for y in range(h): + for x in range(w): + dx, dy = x - cx, y - cy + dist = hypot(dx, dy) + angle = atan2(dy, dx) + + r = int((sin(dist * 0.15) * 0.5 + 0.5) * 255) + g = int((sin(angle * 3.0) * 0.5 + 0.5) * 255) + b = int((cos(dist * 0.10 + angle * 2.0) * 0.5 + 0.5) * 255) + + arr[y, x] = (r, g, b, 255) + + return arr + + +# ------------------------------------------------------------------- +# Try loading transcoder with a backend, return (success, transcoder) +# ------------------------------------------------------------------- +def try_transcoder(backend): + try: + t = Transcoder(backend) + print(f"[OK] Loaded transcoder backend '{backend}' ({t.backend_name})") + return True, t + except Exception as e: + print(f"[SKIP] Backend '{backend}' unavailable:", e) + return False, None + + +# ------------------------------------------------------------------- +# Try loading encoder with a backend, return blob or None +# ------------------------------------------------------------------- +def try_encoder(backend, img): + try: + enc = Encoder(backend) + print(f"[OK] Loaded encoder backend '{backend}' ({enc.backend_name})") + except Exception as e: + print(f"[SKIP] Encoder backend '{backend}' unavailable:", e) + return None + + try: + print(f"[Test] Compressing swirl -> KTX2 using {enc.backend_name}...") + blob = enc.compress( + img, + format=-1, + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT | BasisFlags.SRGB + ) + return blob + except Exception as e: + print(f"[FAIL] Compression failed on backend '{backend}':", e) + return None + + +# ------------------------------------------------------------------- +# Decode blob with a given transcoder +# ------------------------------------------------------------------- +def decode_with_backend(name, t, blob): + try: + rgba = t.decode_rgba(blob) + outname = f"decoded_{name}.png" + Image.fromarray(rgba, mode="RGBA").save(outname) + print(f" --> {name}: decoded successfully, wrote {outname}") + except Exception as e: + print(f" [FAIL] decode_rgba on backend '{name}':", e) + + +# ------------------------------------------------------------------- +# Main test +# ------------------------------------------------------------------- +if __name__ == "__main__": + print("========== BasisU End-to-End Compression & Transcoding Test ==========") + + # ------------------------------------------------------- + # Generate swirl test + # ------------------------------------------------------- + img = make_swirl(256, 256) + print("[Test] Generated swirl:", img.shape) + + # ------------------------------------------------------- + # Try AUTO encoder (native if available, else WASM) + # ------------------------------------------------------- + blob = try_encoder(EncoderBackend.AUTO, img) + if blob is None: + print("[FAIL] Could not encode using AUTO backend; aborting.") + sys.exit(1) + + # Save test.ktx2 + with open("test.ktx2", "wb") as f: + f.write(blob) + print("[Test] Wrote: test.ktx2") + + # ------------------------------------------------------- + # Test transcoding using AUTO + # ------------------------------------------------------- + print("\n[Test] Decoding via AUTO backend...") + ok_auto, t_auto = try_transcoder(TranscoderBackend.AUTO) + if ok_auto: + decode_with_backend("auto", t_auto, blob) + + # ------------------------------------------------------- + # Test NATIVE explicitly (if available) + # ------------------------------------------------------- + print("\n[Test] Decoding via NATIVE backend...") + ok_native, t_native = try_transcoder(TranscoderBackend.NATIVE) + if ok_native: + decode_with_backend("native", t_native, blob) + + # ------------------------------------------------------- + # Test WASM explicitly (if available) + # ------------------------------------------------------- + print("\n[Test] Decoding via WASM backend...") + ok_wasm, t_wasm = try_transcoder(TranscoderBackend.WASM) + if ok_wasm: + decode_with_backend("wasm", t_wasm, blob) + + print("\n========== DONE ==========") diff --git a/external/basis_universal/python/tests/test_transcoder_end_to_end_hdr.py b/external/basis_universal/python/tests/test_transcoder_end_to_end_hdr.py new file mode 100644 index 0000000000..d1fceb1011 --- /dev/null +++ b/external/basis_universal/python/tests/test_transcoder_end_to_end_hdr.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +HDR End-to-End Compression & Transcoding Test +Works on all platforms: + - native if available + - WASM fallback otherwise +""" + +import numpy as np +from math import sin, cos, atan2, hypot +from PIL import Image +import subprocess +import tempfile +import os +import imageio.v3 as iio + +from basisu_py.codec import Encoder, EncoderBackend +from basisu_py.transcoder import Transcoder, TranscoderBackend +from basisu_py.constants import ( + BasisTexFormat, + BasisQuality, + BasisEffort, + BasisFlags +) + + +# ------------------------------------------------------------------- +# Save EXR using TIFF temp + oiiotool (as required) +# ------------------------------------------------------------------- +def save_exr(path, rgba32f): + """ + Save float32 RGBA as EXR if possible. + If oiiotool is not available, save TIFF instead (Windows-safe). + """ + import numpy as np + import imageio.v3 as iio + import subprocess, tempfile, os + + # Write temp TIFF + with tempfile.NamedTemporaryFile(suffix=".tiff", delete=False) as tmp: + temp_path = tmp.name + + iio.imwrite(temp_path, rgba32f.astype(np.float32)) + + # Try EXR via oiiotool + try: + subprocess.run(["oiiotool", temp_path, "-o", path], check=True) + os.remove(temp_path) + print(" Wrote EXR:", path) + return + + except Exception: + # --- FALLBACK: save TIFF --- + fallback_path = path + ".tiff" + + # Windows cannot overwrite files via rename(), so remove first + if os.path.exists(fallback_path): + os.remove(fallback_path) + + # os.replace() always overwrites + os.replace(temp_path, fallback_path) + + print(" [Fallback] Wrote TIFF instead:", fallback_path) + +# ------------------------------------------------------------------- +# Generate HDR swirl image (float32) +# ------------------------------------------------------------------- +def make_swirl_hdr(w=256, h=256): + arr = np.zeros((h, w, 4), dtype=np.float32) + cx, cy = w / 2.0, h / 2.0 + + for y in range(h): + for x in range(w): + dx, dy = x - cx, y - cy + dist = hypot(dx, dy) + angle = atan2(dy, dx) + + # HDR values range up to about 4.0 + r = (sin(dist * 0.08) * 0.5 + 0.5) * 4.0 + g = (sin(angle * 2.0) * 0.5 + 0.5) * 4.0 + b = (cos(dist * 0.06 + angle * 1.5) * 0.5 + 0.5) * 4.0 + + arr[y, x] = (r, g, b, 1.0) + + return arr + + +# ------------------------------------------------------------------- +# Try loading a transcoder backend +# ------------------------------------------------------------------- +def try_transcoder(name, backend): + try: + t = Transcoder(backend) + print(f"[OK] Loaded transcoder backend '{name}' ({t.backend_name})") + return t + except Exception as e: + print(f"[SKIP] Backend '{name}' unavailable:", e) + return None + + +# ------------------------------------------------------------------- +# MAIN +# ------------------------------------------------------------------- +if __name__ == "__main__": + print("========== HDR End-to-End Compression & Transcoding Test ==========") + + # ------------------------------------------------------- + # Create HDR test image + # ------------------------------------------------------- + img_hdr = make_swirl_hdr(256, 256) + print("[HDR] swirl:", img_hdr.shape, img_hdr.dtype) + + # ------------------------------------------------------- + # ENCODE using AUTO backend (native ? or WASM) + # ------------------------------------------------------- + try: + enc = Encoder(EncoderBackend.AUTO) + print(f"[HDR] Encoder backend = {enc.backend_name}") + except Exception as e: + print("[FATAL] Could not create encoder:", e) + exit(1) + + try: + print("[HDR] Compressing HDR swirl -> test_hdr.ktx2...") + ktx2_blob = enc.compress( + img_hdr, + format=-1, # auto-select HDR format + quality=BasisQuality.MAX, + effort=BasisEffort.DEFAULT, + flags=BasisFlags.KTX2_OUTPUT + ) + print(" KTX2 size:", len(ktx2_blob)) + open("test_hdr.ktx2", "wb").write(ktx2_blob) + print(" Wrote test_hdr.ktx2") + except Exception as e: + print("[FATAL] Encoding failed:", e) + exit(1) + + # ------------------------------------------------------- + # DECODE using AUTO (native ? or WASM) + # ------------------------------------------------------- + t_auto = try_transcoder("AUTO", TranscoderBackend.AUTO) + if t_auto: + try: + hdr = t_auto.decode_rgba_hdr(ktx2_blob) + print(" AUTO decoded:", hdr.shape, hdr.dtype) + save_exr("decoded_auto_hdr.exr", hdr) + except Exception as e: + print(" [FAIL] AUTO decode failed:", e) + + # ------------------------------------------------------- + # DECODE using NATIVE if available + # ------------------------------------------------------- + t_native = try_transcoder("NATIVE", TranscoderBackend.NATIVE) + if t_native: + try: + hdr_n = t_native.decode_rgba_hdr(ktx2_blob) + print(" Native decoded:", hdr_n.shape, hdr_n.dtype) + save_exr("decoded_native_hdr.exr", hdr_n) + except Exception as e: + print(" [FAIL] Native decode failed:", e) + + # ------------------------------------------------------- + # DECODE using WASM if available + # ------------------------------------------------------- + t_wasm = try_transcoder("WASM", TranscoderBackend.WASM) + if t_wasm: + try: + hdr_w = t_wasm.decode_rgba_hdr(ktx2_blob) + print(" WASM decoded:", hdr_w.shape, hdr_w.dtype) + save_exr("decoded_wasm_hdr.exr", hdr_w) + except Exception as e: + print(" [FAIL] WASM decode failed:", e) + + print("\n========== DONE ==========") diff --git a/external/basis_universal/python/tests/test_transcoder_helpers.py b/external/basis_universal/python/tests/test_transcoder_helpers.py new file mode 100644 index 0000000000..31245c0000 --- /dev/null +++ b/external/basis_universal/python/tests/test_transcoder_helpers.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +import sys +import numpy as np + +from basisu_py.transcoder import Transcoder, TranscoderBackend +from basisu_py.constants import BasisTexFormat, TranscoderTextureFormat + +print("========== TESTING TRANSCODER HELPERS & METADATA ==========\n") + +# ---------------------------------------------------------------------------- +# Load test KTX2 file +# ---------------------------------------------------------------------------- +try: + ktx2_bytes = open("test.ktx2", "rb").read() + print("[INFO] Loaded test.ktx2") +except FileNotFoundError: + print("[ERROR] test.ktx2 not found. Run encoder tests first.") + sys.exit(1) + + +# ---------------------------------------------------------------------------- +# Utility: run helper tests on a given backend +# ---------------------------------------------------------------------------- +def test_backend(name, backend): + print(f"\n=== Testing {name} backend ===") + + try: + t = Transcoder(backend) + except Exception as e: + print(f"[FAIL] Could not initialize {name} backend:", e) + return + + print(f"[OK] {name} backend loaded") + + # Version + try: + ver = t.get_version() + print(f" version = {ver}") + except Exception as e: + print(" [FAIL] get_version() error:", e) + return + + # enable_debug_printf + try: + t.enable_debug_printf(True) + except Exception as e: + print(" [FAIL] enable_debug_printf() failed") + return + + # Open KTX2 + try: + raw = t.open(ktx2_bytes) + print(" [OK] open() success") + except Exception as e: + print(" [FAIL] open() failed:", e) + return + + # ---------------------------------------------------------------------- + # KTX2 top-level metadata + # ---------------------------------------------------------------------- + try: + w = t.get_width(raw) + h = t.get_height(raw) + lv = t.get_levels(raw) + fc = t.get_faces(raw) + la = t.get_layers(raw) + fmt = t.get_basis_tex_format(raw) + + print(f" Width = {w}") + print(f" Height = {h}") + print(f" Levels = {lv}") + print(f" Faces = {fc}") + print(f" Layers = {la}") + print(f" basis_tex_format = {fmt}") + print(f" has_alpha = {t.has_alpha(raw)}") + print(f" is_hdr = {t.is_hdr(raw)}") + print(f" is_ldr = {t.is_ldr(raw)}") + print(f" is_srgb = {t.is_srgb(raw)}") + print(f" is_etc1s = {t.is_etc1s(raw)}") + print(f" is_uastc_ldr_4x4 = {t.is_uastc_ldr_4x4(raw)}") + print(f" is_xuastc_ldr = {t.is_xuastc_ldr(raw)}") + print(f" is_astc_ldr = {t.is_astc_ldr(raw)}") + print(f" block dims = {t.get_block_width(raw)} x {t.get_block_height(raw)}") + + except Exception as e: + print(" [FAIL] get_* metadata error:", e) + t.close(raw) + return + + # ---------------------------------------------------------------------- + # Per-level metadata for each mipmap + # ---------------------------------------------------------------------- + print("\n -- Level Metadata --") + for level in range(lv): + try: + ow = t.get_level_orig_width(raw, level) + oh = t.get_level_orig_height(raw, level) + nbx = t.get_level_num_blocks_x(raw, level) + nby = t.get_level_num_blocks_y(raw, level) + tb = t.get_level_total_blocks(raw, level) + af = t.get_level_alpha_flag(raw, level) + ff = t.get_level_iframe_flag(raw, level) + + print(f" Level {level}: orig={ow}x{oh}, blocks={nbx}x{nby}, total={tb}, alpha={af}, iframe={ff}") + except Exception as e: + print(f" [FAIL] Level {level} metadata error:", e) + + # ---------------------------------------------------------------------- + # Test ALL basis_tex_format helpers on the file's format + # ---------------------------------------------------------------------- + print("\n -- basis_tex_format helpers --") + + try: + print(f" is_xuastc_ldr = {t.basis_tex_format_is_xuastc_ldr(fmt)}") + print(f" is_astc_ldr = {t.basis_tex_format_is_astc_ldr(fmt)}") + print(f" block W/H = {t.basis_tex_format_get_block_width(fmt)} x " + f"{t.basis_tex_format_get_block_height(fmt)}") + print(f" is_hdr = {t.basis_tex_format_is_hdr(fmt)}") + print(f" is_ldr = {t.basis_tex_format_is_ldr(fmt)}") + except Exception as e: + print(" [FAIL] basis_tex_format_* error:", e) + + # ---------------------------------------------------------------------- + # Test transcoder_texture_format helpers using a few common formats + # ---------------------------------------------------------------------- + print("\n -- transcoder_texture_format helpers --") + + test_formats = [ + TranscoderTextureFormat.TF_RGBA32, + TranscoderTextureFormat.TF_RGBA_HALF, + TranscoderTextureFormat.TF_BC7_RGBA, + TranscoderTextureFormat.TF_ETC1_RGB, + ] + + for tfmt in test_formats: + try: + print(f" Format {tfmt}: hdr={t.basis_transcoder_format_is_hdr(tfmt)}, " + f"ldr={t.basis_transcoder_format_is_ldr(tfmt)}, " + f"has_alpha={t.basis_transcoder_format_has_alpha(tfmt)}, " + f"uncompressed={t.basis_transcoder_format_is_uncompressed(tfmt)}, " + f"bytes/pixel or block={t.basis_get_bytes_per_block_or_pixel(tfmt)}") + except Exception as e: + print(" [FAIL] transcoder_texture_format_* error:", e) + + # ---------------------------------------------------------------------- + # Compute transcode buffer sizes + # ---------------------------------------------------------------------- + print("\n -- compute_transcoded_image_size_in_bytes --") + try: + for tfmt in test_formats: + sz = t.basis_compute_transcoded_image_size_in_bytes(tfmt, w, h) + print(f" Format {tfmt}: size = {sz}") + except Exception as e: + print(" [FAIL] size computation error:", e) + + # ---------------------------------------------------------------------- + # Decode RGBA (LDR) + # ---------------------------------------------------------------------- + print("\n -- decode_rgba --") + try: + img_rgba = t.decode_rgba(ktx2_bytes) + print(f" decode_rgba: shape={img_rgba.shape}, dtype={img_rgba.dtype}") + except Exception as e: + print(" [FAIL] decode_rgba error:", e) + + # ---------------------------------------------------------------------- + # Decode HDR if applicable + # ---------------------------------------------------------------------- + if t.is_hdr(raw): + print("\n -- decode_rgba_hdr --") + try: + img_hdr = t.decode_rgba_hdr(ktx2_bytes) + print(f" decode_rgba_hdr: shape={img_hdr.shape}, dtype={img_hdr.dtype}") + except Exception as e: + print(" [FAIL] decode_rgba_hdr error:", e) + else: + print(" Texture is LDR; skipping decode_rgba_hdr().") + + # Cleanup + t.close(raw) + print(f"\n=== {name} backend OK ===\n") + + +# ---------------------------------------------------------------------------- +# Run tests for both backends +# ---------------------------------------------------------------------------- +test_backend("NATIVE", TranscoderBackend.NATIVE) +test_backend("WASM", TranscoderBackend.WASM) + +print("\n========== DONE ==========\n") diff --git a/external/basis_universal/readme_wasi.md b/external/basis_universal/readme_wasi.md new file mode 100644 index 0000000000..270dc81768 --- /dev/null +++ b/external/basis_universal/readme_wasi.md @@ -0,0 +1,167 @@ +# README_WASI.md + +## Building and running Basis Universal under WASI / Wasmtime + +This document describes how to build the `basisu` command-line tool as a WASI (WebAssembly System Interface) executable, and how to run it using Wasmtime. +WASI builds run the encoder inside a secure, portable WebAssembly sandbox with no native dependencies. + +--- + +## 1. Install Wasmtime + +Install Wasmtime using the official installer: + +``` +curl https://wasmtime.dev/install.sh | bash +``` + +Verify: + +``` +wasmtime --version +``` + +--- + +## 2. Install WASI-SDK (WASI toolchain) + +Download the latest WASI SDK from: + +https://github.com/WebAssembly/wasi-sdk/releases/latest +https://github.com/WebAssembly/wasi-sdk/releases + +Example (adjust version if needed): + +``` +wget https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-29/wasi-sdk-29.0-x86_64-linux.tar.gz +tar xf wasi-sdk-29.0-x86_64-linux.tar.gz +``` + +--- + +## 3. Set the WASI_SDK_PATH environment variable + +You must set the path so CMake can find the WASI compiler: + +``` +export WASI_SDK_PATH=/path/to/wasi-sdk-29.0-x86_64-linux +``` + +Example: + +``` +export WASI_SDK_PATH=$HOME/wasi-sdk-29.0-x86_64-linux +``` + +Verify: + +``` +$WASI_SDK_PATH/bin/clang --version +``` + +--- + +## 4. Configure the WASI build using CMake + +WASI builds come in two modes: +- Single-threaded (default) +- Multi-threaded (requires wasi-sdk-pthread.cmake and Wasmtime threading flags) + +Create a fresh build directory and configure using the WASI toolchain file: + +``` +mkdir build +cd build +cmake .. -DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk-pthread.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=ON +``` + +Or for a single threaded build (will run much slower): + +``` +cmake .. -DCMAKE_TOOLCHAIN_FILE=$WASI_SDK_PATH/share/cmake/wasi-sdk.cmake -DCMAKE_BUILD_TYPE=Release -DBASISU_WASM_THREADING=OFF +``` + +--- + +## 5. Build the WASI `.wasm` executable + +Build using: + +``` +make +``` + +This produces: + +``` +bin/basisu.wasm +bin/examples.wasm (if EXAMPLES=ON) +``` + +--- + +## 6. Running `basisu.wasm` with Wasmtime + +WASI programs are sandboxed and cannot access your filesystem unless you explicitly grant permission. + +Use one or more `--dir=` arguments to allow input/output files. + +### Run internal test suite for ETC1S + +``` +bin$ wasmtime run --wasm threads=yes --wasi threads=yes --dir=. --dir=.. --dir=../test_files basisu.wasm -test +``` + +Use backslashes under Windows: "wasmtime run --wasm threads=yes --wasi threads=yes --dir=. --dir=.. --dir=..\test_files basisu.wasm -test" + +For the single threaded wasm executables, "--wasm threads=yes --wasi threads=yes" isn't needed. + +A Windows .cmd batch script example: + +``` +wasmtime --dir=. --dir=.. --dir=..\test_files --dir=d:/dev/test_images::/test_images --dir=d:/dev/test_images/bik::/bik basisu.wasm %* +``` + +A shell script example: + +``` +#!/usr/bin/env bash +wasmtime run --dir=. --dir=../test_files --dir=/mnt/d/dev/test_images::/test_images --dir=/mnt/d/dev/test_images/bik::/test_images/bik --wasm threads=yes --wasi threads=yes ./basisu.wasm "$@" +``` + +### Example: run compression on a PNG to ETC1S + +``` +wasmtime run --wasm threads=yes --wasi threads=yes --dir=. basisu.wasm xmen.png -stats +``` + +### Example: transcode a KTX2 file to .ktx/.png/etc. + +``` +wasmtime run --wasm threads=yes --wasi threads=yes --dir=. basisu.wasm xmen.ktx2 + +``` + +--- + +## Notes + +- WASI builds run inside a secure sandbox with no filesystem access unless explicitly granted via `--dir=`. +- The CMake configuration sets a larger stack size to support ASTC/UASTC compression. +- WASI SDK and Wasmtime can be installed anywhere; just update `WASI_SDK_PATH`. + +--- + +## Summary + +To build and run BasisU under WASI: + +1. Install **Wasmtime** +2. Install **WASI SDK** +3. Set **WASI_SDK_PATH** +4. Run **cmake** using the WASI toolchain in "build" directory +5. Build with **make** +6. Run using **wasmtime** with `--dir=` permissions on .wasm executables in "bin" directory + +This produces a safe, portable, sandboxed version of the Basis Universal encoder that runs anywhere. + diff --git a/external/basis_universal/shader_deblocking/README.md b/external/basis_universal/shader_deblocking/README.md new file mode 100644 index 0000000000..bcd0004a1b --- /dev/null +++ b/external/basis_universal/shader_deblocking/README.md @@ -0,0 +1,135 @@ +# Python+GLSL Shader Deblocking Sample + +*Block boundaries are predictable.* + +This sample demonstrates how to use a simple pixel shader to greatly reduce +ASTC texture block artifacts, which can be quite noticeable when the block size goes +beyond roughly 6x6. The basic idea: instead of always sampling the texture using +a single tap, you instead sample the texture either one time or X times with a simple low pass filter, +depending on whether or not the sample location is near a block edge. The multiple filter taps around +the center sample blur across block boundaries of ASTC compressed textures. There are two independent filters, for horizontal and vertical block boundaries. + +The example shader is compatible with mipmapping, bilinear filtering, trilinear filtering etc. and is temporally stable. The +shader smoothly lerps between no filtering and edge filtering, and is mipmap-aware by using the pixel shader derivative instructions. Crucially, the block lattice is evaluated in the *effective mip space*, not in base texture space, which is why it's mipmap-aware. The Python sample shows either a textured quad or a cube, with various controls to move the object, rotate the cube, toggle the deblocking shader on/off, trilinear off/on, etc. + +It was written to be as simple as possible. The shader's filter coefficients were picked for more blurring vs. our CPU deblocker to demonstrate the effect, but they are easily tuned. The shader could easily be more optimized (most inner block texels don't need any filtering, but we don't exploit this in the shader with a conditional yet). The idea is compatible with other texture formats with noticeable block artifacts, such as BC1 or ETC1, but 4x4 blocks are so tiny it may be a wash. + +It's also possible to add adaptivity to this shader, so it doesn't blindly blur across sharp edges - like we do while +CPU deblocking before transcoding to BC7 or other LDR texture formats. It's also possible +to add deblocking filter awareness to our ASTC/XUASTC/etc. encoders. + +The shader computes the mipmap LOD index assuming an entire mip chain is present (i.e. down to 1x1 texels). If the mipmap chain is incomplete, the developer should clamp the computed mipmap LOD index in the shader. We'll show this in the next update of the sample. + +Note: We're amazed the GPU hardware vendors haven't implemented this feature directly in silicon yet. It's extremely useful, even necessary at the largest ASTC block sizes. +This is a form of "GPU texture compression-aware shading" or "GPU format-informed reconstruction". + +## Running the Sample + +You'll need these Python dependencies to run it: +``` +pip install numpy Pillow glfw PyOpenGL +``` + +You may also want "PyOpenGL_accelerate", and under Linux you may need to also install the system package "libglfw3". We developed this sample under Windows 11. + +Run: + +``` +python testbed.py shader.glsl 12 12 flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_0_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_1_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_2_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_3_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_4_face_0_layer_0000.png +``` + +Depending on your setup you may need to use `python3`, or `py -3.12` etc. + +Keys: `1` toggles deblocking shader on/off (it defaults to disabled), `2` toggles edges vis (only shows when deblocking is enabled), arrow keys move the object, `W`/`S` keys: forward/backward, `A`/`D`/`Q`/`E`: yaw/pitch, `C`: toggle cube vs. quad. (See the source code remarks for more.) + +The shader can be easily simplified to sample the texture less by using less taps. The current shader uses a total of 9 taps, but 5 are possible. + +Many variations and optimizations of this basic idea are possible. *Now shader engineers can directly impact memory consumption.* The better your deblocking shaders are tuned or your specific content, the bigger the ASTC block size you can ship. ASTC texture deblocking pixel shader engineering is now a memory optimization skill. + +A simpler, [faster 5 tap filter variant is here](https://pastebin.com/jsF6nUZg). (This shader isn't quite compatible with the currently checked in Python sample, as it uses a new `maxLod` uniform to limit the max fetched mipmap level.) In practice, this simple shader usually looks just as good on most textures/images. ` textureQueryLod()` and `textureSize()` could also be used (with some API's) to simplify the shader further. We're continuing to refine and tune this shader. + +--- + +## Performance + +The deblocking shader itself can be put inside a dynamic `if` conditional, so the extra ALU/texture ops only kick in near block edges (which are the minority of samples at ASTC 12x12). The extra sample taps are spatially always near the center sample, so they'll hit the texture cache most of the time. In texture bandwidth bound rendering scenarios (quite common on mobile platforms), the extra ALU ops for deblocking likely come for "free". + +*Another perspective: The alternative to not deblocking is ~2x-8x more GPU memory bandwidth (and increased download size) to use smaller ASTC block sizes which have less noticeable block artifacts.* + +--- + +## Screenshots - ASTC 12x12 Block Size + +**Disabled:** +![Screenshot 1: Off](screenshots/1_off.png) + +**Enabled:** +![Screenshot 1: On](screenshots/1_on.png) + +--- + +**Disabled:** +![Screenshot 2: Off](screenshots/2_off.png) + +**Enabled:** +![Screenshot 2: On](screenshots/2_on.png) + +--- + +**Disabled:** +![Screenshot 3: Off](screenshots/3_off.png) + +**Enabled:** +![Screenshot 3: On](screenshots/3_on.png) + +--- + +**Block Edge Computation Visualization:** +![Edge Vis](screenshots/edge_vis.png) +![Edge Vis 2](screenshots/edge_vis2.png) + +These screenshots show how the pixel shader computes texture block boundaries in effective mipmap space. To see this visualization, press '1' to enable deblocking, then '2' to enable block edge visualization. Only white areas in this visualization are modified by this shader, leaving the inner block texels unmodified. + +**Obviously, it's crucial that the block size passed into the shader via constants exactly matches the ASTC texture's block size, or the filtering applied won't align with the actual ASTC block artifacts.** + +--- + +## Usage and Controls + +The sample either renders a single textured quad or a cube. Press 'C' to toggle between the quad and the cube. The '1' key toggles shader deblocking (off by default). The '2' key enables edge visualization, which only works when deblocking is enabled. + +Other keys can be used to move around the quad, rotate the cube etc.: + +``` +Usage: + python testbed.py shader.glsl block_w block_h mip0.png mip1.png [mip2.png ...] + block_w, block_h: Block size in texels (e.g. 8 8 for 8x8 DCT blocks) + +Controls: + Arrows Move quad left/right/up/down + W / S Move closer / farther + A / D Rotate yaw (cube mode) + Q / E Rotate pitch (cube mode) + C Toggle cube / quad mode + B Bilinear filtering + T Trilinear filtering + P Point filtering + R Reload shader + 1 Toggle deblocking shader off/on + 2 Toggle edge visualization (only when deblocking active) + 3-4 Toggle shader const0.x/y/z/w (0 <-> 1) + 5-8 Toggle shader const1.x/y/z/w (0 <-> 1) + Space Reset to initial state + Esc Quit +``` + +--- + +## Credits + +The included sunflower image is in the CC0/Public Domain, and was downloaded from here: + +https://www.publicdomainpictures.net/en/view-image.php?image=756601&picture=large-yellow-sunflower + +"License: CC0 Public Domain - Lynn Greyling has released this “Large Yellow Sunflower†image under Public Domain license." + diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_0_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_0_face_0_layer_0000.png new file mode 100644 index 0000000000..5551f9bbad Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_0_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_10_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_10_face_0_layer_0000.png new file mode 100644 index 0000000000..a3c9bd2ac0 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_10_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_1_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_1_face_0_layer_0000.png new file mode 100644 index 0000000000..f9ff90a6cd Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_1_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_2_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_2_face_0_layer_0000.png new file mode 100644 index 0000000000..fd2812726f Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_2_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_3_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_3_face_0_layer_0000.png new file mode 100644 index 0000000000..51287c87d3 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_3_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_4_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_4_face_0_layer_0000.png new file mode 100644 index 0000000000..1db85df557 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_4_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_5_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_5_face_0_layer_0000.png new file mode 100644 index 0000000000..00e5a9743d Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_5_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_6_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_6_face_0_layer_0000.png new file mode 100644 index 0000000000..e9c9b16150 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_6_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_7_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_7_face_0_layer_0000.png new file mode 100644 index 0000000000..fc9eccd3f5 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_7_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_8_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_8_face_0_layer_0000.png new file mode 100644 index 0000000000..c9efb6c5cb Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_8_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_9_face_0_layer_0000.png b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_9_face_0_layer_0000.png new file mode 100644 index 0000000000..d328ff1d54 Binary files /dev/null and b/external/basis_universal/shader_deblocking/flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_9_face_0_layer_0000.png differ diff --git a/external/basis_universal/shader_deblocking/run.bat b/external/basis_universal/shader_deblocking/run.bat new file mode 100644 index 0000000000..342dc2f22a --- /dev/null +++ b/external/basis_universal/shader_deblocking/run.bat @@ -0,0 +1 @@ +py -3.12 testbed.py shader.glsl 12 12 flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_0_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_1_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_2_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_3_face_0_layer_0000.png flower_unpacked_rgb_ASTC_LDR_12X12_RGBA_level_4_face_0_layer_0000.png diff --git a/external/basis_universal/shader_deblocking/screenshots/1_off.png b/external/basis_universal/shader_deblocking/screenshots/1_off.png new file mode 100644 index 0000000000..1f04f2c98a Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/1_off.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/1_on.png b/external/basis_universal/shader_deblocking/screenshots/1_on.png new file mode 100644 index 0000000000..e2d8e249be Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/1_on.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/2_off.png b/external/basis_universal/shader_deblocking/screenshots/2_off.png new file mode 100644 index 0000000000..bc88fbd2b7 Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/2_off.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/2_on.png b/external/basis_universal/shader_deblocking/screenshots/2_on.png new file mode 100644 index 0000000000..61315f7230 Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/2_on.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/3_off.png b/external/basis_universal/shader_deblocking/screenshots/3_off.png new file mode 100644 index 0000000000..2d5a4f3a57 Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/3_off.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/3_on.png b/external/basis_universal/shader_deblocking/screenshots/3_on.png new file mode 100644 index 0000000000..5ba5559c76 Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/3_on.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/edge_vis.png b/external/basis_universal/shader_deblocking/screenshots/edge_vis.png new file mode 100644 index 0000000000..7553330f2e Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/edge_vis.png differ diff --git a/external/basis_universal/shader_deblocking/screenshots/edge_vis2.png b/external/basis_universal/shader_deblocking/screenshots/edge_vis2.png new file mode 100644 index 0000000000..0e18c83c34 Binary files /dev/null and b/external/basis_universal/shader_deblocking/screenshots/edge_vis2.png differ diff --git a/external/basis_universal/shader_deblocking/shader.glsl b/external/basis_universal/shader_deblocking/shader.glsl new file mode 100644 index 0000000000..837c686462 --- /dev/null +++ b/external/basis_universal/shader_deblocking/shader.glsl @@ -0,0 +1,97 @@ +#vertex +#version 330 core + +layout(location = 0) in vec3 aPos; +layout(location = 1) in vec2 aUV; + +uniform mat4 mvp; + +out vec2 vUV; + +void main() { + vUV = aUV; + gl_Position = mvp * vec4(aPos, 1.0); +} + +#fragment +#version 330 core + +uniform sampler2D tex; +uniform vec4 texSize; // Base mip dimensions (mip 0) +uniform vec4 const0; // User constant 0 (keys 1-4 toggle x,y,z,w) +uniform vec4 const1; // User constant 1 (keys 5-8 toggle x,y,z,w) + +in vec2 vUV; +out vec4 fragColor; + +void main() +{ + vec2 blockSize = vec2(texSize.z, texSize.w); + + vec2 du = dFdx(vUV); + vec2 dv = dFdy(vUV); + float rho = max(length(du * texSize.xy), length(dv * texSize.xy)); + float lod = max(0.0, log2(max(rho, 1e-8))); // lod index + float mipScale = exp2(floor(lod + .5)); // 2^lod mipmap scale, snaps to dominant mipmap + + vec2 texDim = vec2(texSize.x, texSize.y); + vec2 texelStep = 1.0 / texDim; + vec2 texelPos = (vUV * texDim) / mipScale; + vec2 blockPos = mod(texelPos, blockSize); + + vec3 color; + color = texture(tex, vUV).rgb; + + if (const0.x > 0.5) + { + float falloff = 2.0; + + float leftProx = 1.0 - clamp(blockPos.x / falloff, 0.0, 1.0); + float rightProx = 1.0 - clamp((blockSize.x - 1.0 - blockPos.x) / falloff, 0.0, 1.0); + float topProx = 1.0 - clamp(blockPos.y / falloff, 0.0, 1.0); + float bottomProx = 1.0 - clamp((blockSize.y - 1.0 - blockPos.y) / falloff, 0.0, 1.0); + + float horizWeight = max(leftProx, rightProx); + float vertWeight = max(topProx, bottomProx); + float edgeWeight = max(horizWeight, vertWeight); // overall proximity + + vec3 c0 = color; //texture2D(tex, vUV).rgb; + + vec3 l2 = texture2D(tex, vUV - vec2(2 * texelStep.x * mipScale, 0.0)).rgb; + vec3 l1 = texture2D(tex, vUV - vec2(texelStep.x * mipScale, 0.0)).rgb; + vec3 r1 = texture2D(tex, vUV + vec2(texelStep.x * mipScale, 0.0)).rgb; + vec3 r2 = texture2D(tex, vUV + vec2(2 * texelStep.x * mipScale, 0.0)).rgb; + + vec3 u2 = texture2D(tex, vUV - vec2(0.0, 2 * texelStep.y * mipScale)).rgb; + vec3 u1 = texture2D(tex, vUV - vec2(0.0, texelStep.y * mipScale)).rgb; + vec3 d1 = texture2D(tex, vUV + vec2(0.0, texelStep.y * mipScale)).rgb; + vec3 d2 = texture2D(tex, vUV + vec2(0.0, 2 * texelStep.y * mipScale)).rgb; + + //vec3 filteredH = (l2 + 2 * l1 + 3 * c0 + 2 * r1 + r2) / 9.0; + //vec3 filteredV = (u2 + 2 * u1 + 3 * c0 + 2 * d1 + d2) / 9.0; + + vec3 filteredH = (l2 + 2 * l1 + 2 * c0 + 2 * r1 + r2) / 8.0; + vec3 filteredV = (u2 + 2 * u1 + 2 * c0 + 2 * d1 + d2) / 8.0; + + float smoothH = 1.0; + float smoothV = 1.0; + + if (edgeWeight > 0.0) + { + vec3 horizColor = mix(c0, filteredH, smoothH * horizWeight); + vec3 vertColor = mix(c0, filteredV, smoothV * vertWeight); + + float totalW = horizWeight + vertWeight; + if (totalW > 0.0) + color = (horizColor * horizWeight + vertColor * vertWeight) / totalW; + } + + // block edge vis + if (const0.y > 0.5) + { + color = vec3(edgeWeight, edgeWeight, edgeWeight); + } + } + + fragColor = vec4(color, 1.0); +} diff --git a/external/basis_universal/shader_deblocking/testbed.py b/external/basis_universal/shader_deblocking/testbed.py new file mode 100644 index 0000000000..a29d2e64ba --- /dev/null +++ b/external/basis_universal/shader_deblocking/testbed.py @@ -0,0 +1,861 @@ +#!/usr/bin/env python3 +""" +Mipmap Compatible Texture Sampling Deblocking Shader Testbed +Copyright (C) 2026 Binomial LLC. +LICENSE: Apache 2.0 + +Usage: + python testbed.py shader.glsl block_w block_h mip0.png mip1.png [mip2.png ...] + block_w, block_h: Block size in texels (e.g. 8 8 for 8x8 DCT blocks) + +Controls: + Arrows Move quad left/right/up/down + W / S Move closer / farther + A / D Rotate yaw (cube mode) + Q / E Rotate pitch (cube mode) + C Toggle cube / quad mode + B Bilinear filtering + T Trilinear filtering + P Point filtering + R Reload shader + 1 Toggle deblocking shader off/on + 2 Toggle edge visualization (only when deblocking active) + 3-4 Toggle shader const0.x/y/z/w (0 <-> 1) + 5-8 Toggle shader const1.x/y/z/w (0 <-> 1) + Space Reset to initial state + Esc Quit +""" + +import sys, os, importlib.util +print("=== DIAG ===") +print("exe:", sys.executable) +print("ver:", sys.version) +print("cwd:", os.getcwd()) +print("glfw spec:", importlib.util.find_spec("glfw")) +print("OpenGL spec:", importlib.util.find_spec("OpenGL")) +print("============") + +import sys +import ctypes +import numpy as np +from PIL import Image, ImageDraw, ImageFont +from pathlib import Path + +import glfw +from OpenGL.GL import * + + +# ----------------------------------------------------------------------------- +# Globals +# ----------------------------------------------------------------------------- +WINDOW_WIDTH = 1280 +WINDOW_HEIGHT = 720 +FOV_DEGREES = 90.0 +Z_MIN = .40 +Z_MAX = -50.0 +Z_SPEED = 2.0 +XY_SPEED = 1.5 +ROT_SPEED = 90.0 # degrees per second +# Block size (set from command line) +BLOCK_WIDTH = 12 +BLOCK_HEIGHT = 12 + +g_state = { + 'x': 0.0, + 'y': 0.0, + 'z': -3.0, + 'yaw': 0.0, + 'pitch': 0.0, + 'mode': 'QUAD', # 'QUAD' or 'CUBE' + 'filter_mode': 'BILINEAR', + 'shader_path': None, + 'program': None, + 'texture': None, + 'tex_size': (0, 0), + 'quad_vao': None, + 'cube_vao': None, + 'cube_index_count': 0, + 'debug_vao': None, + 'debug_texture': None, + 'debug_dirty': True, + 'last_time': 0.0, + 'const0': [0.0, 0.0, 0.0, 0.0], + 'const1': [0.0, 0.0, 0.0, 0.0], +} +INIT_X = 0.0 +INIT_Y = 0.0 +INIT_Z = -3.0 +INIT_YAW = 0.0 +INIT_PITCH = 0.0 +INIT_CONST0 = [0.0, 0.0, 0.0, 0.0] +INIT_CONST1 = [0.0, 0.0, 0.0, 0.0] + + +# ----------------------------------------------------------------------------- +# Shader Loading +# ----------------------------------------------------------------------------- +def parse_shader_file(path): + """Parse shader file with #vertex and #fragment markers.""" + text = Path(path).read_text() + + vertex_src = None + fragment_src = None + + parts = text.split('#vertex') + if len(parts) < 2: + print(f"ERROR: No #vertex marker found in {path}") + return None, None + + rest = parts[1] + frag_parts = rest.split('#fragment') + if len(frag_parts) < 2: + print(f"ERROR: No #fragment marker found in {path}") + return None, None + + vertex_src = frag_parts[0].strip() + fragment_src = frag_parts[1].strip() + + return vertex_src, fragment_src + + +def compile_shader(source, shader_type): + """Compile a shader, return handle or None on error.""" + shader = glCreateShader(shader_type) + glShaderSource(shader, source) + glCompileShader(shader) + + if glGetShaderiv(shader, GL_COMPILE_STATUS) != GL_TRUE: + error = glGetShaderInfoLog(shader) + if isinstance(error, bytes): + error = error.decode('utf-8') + type_name = "VERTEX" if shader_type == GL_VERTEX_SHADER else "FRAGMENT" + print(f"{type_name} SHADER ERROR:\n{error}") + glDeleteShader(shader) + return None + + return shader + + +def link_program(vertex_shader, fragment_shader): + """Link shaders into program, return handle or None on error.""" + program = glCreateProgram() + glAttachShader(program, vertex_shader) + glAttachShader(program, fragment_shader) + glLinkProgram(program) + + if glGetProgramiv(program, GL_LINK_STATUS) != GL_TRUE: + error = glGetProgramInfoLog(program) + if isinstance(error, bytes): + error = error.decode('utf-8') + print(f"LINK ERROR:\n{error}") + glDeleteProgram(program) + return None + + return program + + +def load_shader(path): + """Load, compile, and link shader from file. Returns program or None.""" + print(f"Loading shader: {path}") + + vertex_src, fragment_src = parse_shader_file(path) + if vertex_src is None or fragment_src is None: + return None + + vertex_shader = compile_shader(vertex_src, GL_VERTEX_SHADER) + if vertex_shader is None: + return None + + fragment_shader = compile_shader(fragment_src, GL_FRAGMENT_SHADER) + if fragment_shader is None: + glDeleteShader(vertex_shader) + return None + + program = link_program(vertex_shader, fragment_shader) + + glDeleteShader(vertex_shader) + glDeleteShader(fragment_shader) + + if program: + print("Shader compiled successfully.") + + return program + + +def reload_shader(): + """Attempt to reload shader. Keep old one if failed.""" + new_program = load_shader(g_state['shader_path']) + if new_program is not None: + if g_state['program'] is not None: + glDeleteProgram(g_state['program']) + g_state['program'] = new_program + else: + print("Shader reload failed, keeping previous shader.") + + +# ----------------------------------------------------------------------------- +# Texture Loading +# ----------------------------------------------------------------------------- +def load_mipmap_texture(paths): + """Load PNG files as mipmap levels. Returns texture handle and base size.""" + images = [] + + for i, path in enumerate(paths): + img = Image.open(path).convert('RGBA') + images.append(img) + print(f"Loaded mip {i}: {path} ({img.width}x{img.height})") + + # Validate dimensions + for i in range(1, len(images)): + expected_w = images[i - 1].width // 2 + expected_h = images[i - 1].height // 2 + actual_w = images[i].width + actual_h = images[i].height + + if actual_w != expected_w or actual_h != expected_h: + print(f"ERROR: Mip {i} should be {expected_w}x{expected_h}, got {actual_w}x{actual_h}") + sys.exit(1) + + # Create texture + texture = glGenTextures(1) + glBindTexture(GL_TEXTURE_2D, texture) + + # Upload each mip level + for level, img in enumerate(images): + data = np.array(img, dtype=np.uint8) + glTexImage2D( + GL_TEXTURE_2D, level, GL_RGBA8, + img.width, img.height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, data + ) + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, len(images) - 1) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE) + + base_size = (images[0].width, images[0].height) + return texture, base_size + + +def set_filter_mode(mode): + """Set texture filtering mode.""" + glBindTexture(GL_TEXTURE_2D, g_state['texture']) + + if mode == 'BILINEAR': + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) + elif mode == 'TRILINEAR': + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) + else: # POINT + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) + + g_state['filter_mode'] = mode + g_state['debug_dirty'] = True + + +# ----------------------------------------------------------------------------- +# Geometry +# ----------------------------------------------------------------------------- +def create_quad(aspect_ratio): + """Create a quad VAO centered at origin with given aspect ratio.""" + # Normalize so longest dimension is 1.0 + if aspect_ratio >= 1.0: + half_w = 1.0 + half_h = 1.0 / aspect_ratio + else: + half_w = aspect_ratio + half_h = 1.0 + + # Position (x, y, z) + UV (u, v) + vertices = np.array([ + -half_w, -half_h, 0.0, 0.0, 1.0, + half_w, -half_h, 0.0, 1.0, 1.0, + half_w, half_h, 0.0, 1.0, 0.0, + -half_w, half_h, 0.0, 0.0, 0.0, + ], dtype=np.float32) + + indices = np.array([0, 1, 2, 0, 2, 3], dtype=np.uint32) + + vao = glGenVertexArrays(1) + vbo = glGenBuffers(1) + ebo = glGenBuffers(1) + + glBindVertexArray(vao) + + glBindBuffer(GL_ARRAY_BUFFER, vbo) + glBufferData(GL_ARRAY_BUFFER, vertices.nbytes, vertices, GL_STATIC_DRAW) + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo) + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.nbytes, indices, GL_STATIC_DRAW) + + # Position attribute (location 0) + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 20, ctypes.c_void_p(0)) + glEnableVertexAttribArray(0) + + # UV attribute (location 1) + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 20, ctypes.c_void_p(12)) + glEnableVertexAttribArray(1) + + glBindVertexArray(0) + + return vao + + +def create_cube(size=1.0): + """Create a textured cube VAO centered at origin.""" + h = size / 2.0 + + # Each face: 4 vertices with position (x,y,z) + UV (u,v) + # Front face (z = +h) + front = [ + -h, -h, h, 0.0, 1.0, + h, -h, h, 1.0, 1.0, + h, h, h, 1.0, 0.0, + -h, h, h, 0.0, 0.0, + ] + # Back face (z = -h) + back = [ + h, -h, -h, 0.0, 1.0, + -h, -h, -h, 1.0, 1.0, + -h, h, -h, 1.0, 0.0, + h, h, -h, 0.0, 0.0, + ] + # Right face (x = +h) + right = [ + h, -h, h, 0.0, 1.0, + h, -h, -h, 1.0, 1.0, + h, h, -h, 1.0, 0.0, + h, h, h, 0.0, 0.0, + ] + # Left face (x = -h) + left = [ + -h, -h, -h, 0.0, 1.0, + -h, -h, h, 1.0, 1.0, + -h, h, h, 1.0, 0.0, + -h, h, -h, 0.0, 0.0, + ] + # Top face (y = +h) + top = [ + -h, h, h, 0.0, 1.0, + h, h, h, 1.0, 1.0, + h, h, -h, 1.0, 0.0, + -h, h, -h, 0.0, 0.0, + ] + # Bottom face (y = -h) + bottom = [ + -h, -h, -h, 0.0, 1.0, + h, -h, -h, 1.0, 1.0, + h, -h, h, 1.0, 0.0, + -h, -h, h, 0.0, 0.0, + ] + + vertices = np.array(front + back + right + left + top + bottom, dtype=np.float32) + + # 6 faces, each with 2 triangles (6 indices per face) + indices = [] + for i in range(6): + base = i * 4 + indices.extend([base, base+1, base+2, base, base+2, base+3]) + indices = np.array(indices, dtype=np.uint32) + + vao = glGenVertexArrays(1) + vbo = glGenBuffers(1) + ebo = glGenBuffers(1) + + glBindVertexArray(vao) + + glBindBuffer(GL_ARRAY_BUFFER, vbo) + glBufferData(GL_ARRAY_BUFFER, vertices.nbytes, vertices, GL_STATIC_DRAW) + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo) + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.nbytes, indices, GL_STATIC_DRAW) + + # Position attribute (location 0) + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 20, ctypes.c_void_p(0)) + glEnableVertexAttribArray(0) + + # UV attribute (location 1) + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 20, ctypes.c_void_p(12)) + glEnableVertexAttribArray(1) + + glBindVertexArray(0) + + return vao, len(indices) + + +def create_debug_quad(): + """Create a screen-space quad for debug text overlay.""" + # Screen-space quad at top-left + # NDC: x=-1 is left, y=1 is top + w = 680.0 / WINDOW_WIDTH * 2.0 + h = 60.0 / WINDOW_HEIGHT * 2.0 + + vertices = np.array([ + -1.0, 1.0, 0.0, 0.0, + -1.0 + w, 1.0, 1.0, 0.0, + -1.0 + w, 1.0 - h, 1.0, 1.0, + -1.0, 1.0 - h, 0.0, 1.0, + ], dtype=np.float32) + + indices = np.array([0, 1, 2, 0, 2, 3], dtype=np.uint32) + + vao = glGenVertexArrays(1) + vbo = glGenBuffers(1) + ebo = glGenBuffers(1) + + glBindVertexArray(vao) + + glBindBuffer(GL_ARRAY_BUFFER, vbo) + glBufferData(GL_ARRAY_BUFFER, vertices.nbytes, vertices, GL_STATIC_DRAW) + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo) + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.nbytes, indices, GL_STATIC_DRAW) + + # Position attribute (location 0) - xy only + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 16, ctypes.c_void_p(0)) + glEnableVertexAttribArray(0) + + # UV attribute (location 1) + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 16, ctypes.c_void_p(8)) + glEnableVertexAttribArray(1) + + glBindVertexArray(0) + + return vao + + +# ----------------------------------------------------------------------------- +# Debug Text +# ----------------------------------------------------------------------------- +DEBUG_VERTEX = """ +#version 330 core +layout(location = 0) in vec2 aPos; +layout(location = 1) in vec2 aUV; +out vec2 vUV; +void main() { + vUV = aUV; + gl_Position = vec4(aPos, 0.0, 1.0); +} +""" + +DEBUG_FRAGMENT = """ +#version 330 core +uniform sampler2D tex; +in vec2 vUV; +out vec4 fragColor; +void main() { + fragColor = texture(tex, vUV); +} +""" + +g_debug_program = None + + +def init_debug_rendering(): + """Initialize debug text rendering resources.""" + global g_debug_program + + vs = compile_shader(DEBUG_VERTEX, GL_VERTEX_SHADER) + fs = compile_shader(DEBUG_FRAGMENT, GL_FRAGMENT_SHADER) + + if vs is None or fs is None: + print("ERROR: Failed to compile debug shaders") + if vs: + glDeleteShader(vs) + if fs: + glDeleteShader(fs) + return + + g_debug_program = link_program(vs, fs) + glDeleteShader(vs) + glDeleteShader(fs) + + if g_debug_program is None: + print("ERROR: Failed to link debug program") + return + + g_state['debug_vao'] = create_debug_quad() + g_state['debug_texture'] = glGenTextures(1) + + +def update_debug_text(): + """Render debug text to texture.""" + if not g_state['debug_dirty']: + return + + c0 = g_state['const0'] + c1 = g_state['const1'] + + # Build status lines + lines = [ + f"Mode:{g_state['mode']:4s} Filter:{g_state['filter_mode']:9s} Block:{BLOCK_WIDTH}x{BLOCK_HEIGHT} Deblock: [{int(c0[0])}{int(c0[1])}{int(c0[2])}{int(c0[3])}][{int(c1[0])}{int(c1[1])}{int(c1[2])}{int(c1[3])}]", + f"X:{g_state['x']:+5.1f} Y:{g_state['y']:+5.1f} Z:{g_state['z']:5.1f} Yaw:{g_state['yaw']:+6.1f} Pitch:{g_state['pitch']:+6.1f}", + "Arrows:move, W/S:zoom, A/D:yaw, Q/E:pitch, C:cube, B/T/P:filter, 1=deblocking toggle, 2=edge vis, R:reload, Space:reset", + ] + + img = Image.new('RGBA', (680, 60), (0, 0, 0, 180)) + draw = ImageDraw.Draw(img) + + try: + font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", 14) + except: + font = ImageFont.load_default() + + y = 4 + for line in lines: + draw.text((6, y), line, fill=(255, 255, 255, 255), font=font) + y += 18 + + data = np.array(img, dtype=np.uint8) + + glBindTexture(GL_TEXTURE_2D, g_state['debug_texture']) + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, img.width, img.height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, data) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) + + g_state['debug_dirty'] = False + + +def draw_debug_text(): + """Draw debug text overlay.""" + if g_debug_program is None: + return + + update_debug_text() + + glEnable(GL_BLEND) + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) + glDisable(GL_DEPTH_TEST) + + glUseProgram(g_debug_program) + glBindTexture(GL_TEXTURE_2D, g_state['debug_texture']) + glBindVertexArray(g_state['debug_vao']) + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, None) + + glEnable(GL_DEPTH_TEST) + glDisable(GL_BLEND) + + +# ----------------------------------------------------------------------------- +# Math +# ----------------------------------------------------------------------------- +def perspective_matrix(fov_deg, aspect, near, far): + """Create perspective projection matrix.""" + fov_rad = np.radians(fov_deg) + f = 1.0 / np.tan(fov_rad / 2.0) + + m = np.zeros((4, 4), dtype=np.float32) + m[0, 0] = f / aspect + m[1, 1] = f + m[2, 2] = (far + near) / (near - far) + m[2, 3] = (2 * far * near) / (near - far) + m[3, 2] = -1.0 + + return m + + +def translation_matrix(x, y, z): + """Create translation matrix.""" + m = np.eye(4, dtype=np.float32) + m[0, 3] = x + m[1, 3] = y + m[2, 3] = z + return m + + +def rotation_matrix_y(deg): + """Create rotation matrix around Y axis (yaw).""" + rad = np.radians(deg) + c, s = np.cos(rad), np.sin(rad) + m = np.eye(4, dtype=np.float32) + m[0, 0] = c + m[0, 2] = s + m[2, 0] = -s + m[2, 2] = c + return m + + +def rotation_matrix_x(deg): + """Create rotation matrix around X axis (pitch).""" + rad = np.radians(deg) + c, s = np.cos(rad), np.sin(rad) + m = np.eye(4, dtype=np.float32) + m[1, 1] = c + m[1, 2] = -s + m[2, 1] = s + m[2, 2] = c + return m + + +# ----------------------------------------------------------------------------- +# Input +# ----------------------------------------------------------------------------- +def framebuffer_size_callback(window, width, height): + """Handle window resize.""" + global WINDOW_WIDTH, WINDOW_HEIGHT + WINDOW_WIDTH = width + WINDOW_HEIGHT = height + glViewport(0, 0, width, height) + g_state['debug_dirty'] = True + + +def key_callback(window, key, scancode, action, mods): + if action == glfw.PRESS: + if key == glfw.KEY_ESCAPE: + glfw.set_window_should_close(window, True) + elif key == glfw.KEY_R: + reload_shader() + elif key == glfw.KEY_B: + set_filter_mode('BILINEAR') + print("Filter: BILINEAR") + elif key == glfw.KEY_P: + set_filter_mode('POINT') + print("Filter: POINT") + elif key == glfw.KEY_T: + set_filter_mode('TRILINEAR') + print("Filter: TRILINEAR") + # Toggle const0 components (keys 1-4) + elif key == glfw.KEY_1: + g_state['const0'][0] = 1.0 - g_state['const0'][0] + print(f"const0: {g_state['const0']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_2: + g_state['const0'][1] = 1.0 - g_state['const0'][1] + print(f"const0: {g_state['const0']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_3: + g_state['const0'][2] = 1.0 - g_state['const0'][2] + print(f"const0: {g_state['const0']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_4: + g_state['const0'][3] = 1.0 - g_state['const0'][3] + print(f"const0: {g_state['const0']}") + g_state['debug_dirty'] = True + # Toggle const1 components (keys 5-8) + elif key == glfw.KEY_5: + g_state['const1'][0] = 1.0 - g_state['const1'][0] + print(f"const1: {g_state['const1']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_6: + g_state['const1'][1] = 1.0 - g_state['const1'][1] + print(f"const1: {g_state['const1']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_7: + g_state['const1'][2] = 1.0 - g_state['const1'][2] + print(f"const1: {g_state['const1']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_8: + g_state['const1'][3] = 1.0 - g_state['const1'][3] + print(f"const1: {g_state['const1']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_C: + g_state['mode'] = 'CUBE' if g_state['mode'] == 'QUAD' else 'QUAD' + print(f"Mode: {g_state['mode']}") + g_state['debug_dirty'] = True + elif key == glfw.KEY_SPACE: + g_state['x'] = INIT_X + g_state['y'] = INIT_Y + g_state['z'] = INIT_Z + g_state['yaw'] = INIT_YAW + g_state['pitch'] = INIT_PITCH + g_state['const0'] = INIT_CONST0.copy() + g_state['const1'] = INIT_CONST1.copy() + g_state['debug_dirty'] = True + print("Reset to initial state") + + +def process_held_keys(window, dt): + """Process continuously held keys.""" + moved = False + + if glfw.get_key(window, glfw.KEY_W) == glfw.PRESS: + g_state['z'] += Z_SPEED * dt + moved = True + + if glfw.get_key(window, glfw.KEY_S) == glfw.PRESS: + g_state['z'] -= Z_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_LEFT) == glfw.PRESS: + g_state['x'] += XY_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_RIGHT) == glfw.PRESS: + g_state['x'] -= XY_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_UP) == glfw.PRESS: + g_state['y'] += XY_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_DOWN) == glfw.PRESS: + g_state['y'] -= XY_SPEED * dt + moved = True + + # Rotation (A/D for yaw, Q/E for pitch) + if glfw.get_key(window, glfw.KEY_A) == glfw.PRESS: + g_state['yaw'] += ROT_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_D) == glfw.PRESS: + g_state['yaw'] -= ROT_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_Q) == glfw.PRESS: + g_state['pitch'] += ROT_SPEED * dt + moved = True + if glfw.get_key(window, glfw.KEY_E) == glfw.PRESS: + g_state['pitch'] -= ROT_SPEED * dt + moved = True + + # Clamp Z + g_state['z'] = max(Z_MAX, min(Z_MIN, g_state['z'])) + + if moved: + g_state['debug_dirty'] = True + + +# ----------------------------------------------------------------------------- +# Main +# ----------------------------------------------------------------------------- +def main(): + global BLOCK_WIDTH, BLOCK_HEIGHT + if len(sys.argv) < 5: + print(__doc__) + print("ERROR: Need shader, block_w, block_h, and at least one mipmap PNG") + print("Example: python testbed.py shader.glsl 8 8 mip0.png mip1.png") + sys.exit(1) + + shader_path = sys.argv[1] + try: + BLOCK_WIDTH = int(sys.argv[2]) + BLOCK_HEIGHT = int(sys.argv[3]) + except ValueError: + print(f"ERROR: block_w and block_h must be integers, got '{sys.argv[2]}' '{sys.argv[3]}'") + sys.exit(1) + if BLOCK_WIDTH < 1 or BLOCK_HEIGHT < 1: + print(f"ERROR: block size must be positive, got {BLOCK_WIDTH}x{BLOCK_HEIGHT}") + sys.exit(1) + mip_paths = sys.argv[4:] + print(f"Block size: {BLOCK_WIDTH}x{BLOCK_HEIGHT}") + + g_state['shader_path'] = shader_path + + # Init GLFW + if not glfw.init(): + print("ERROR: Failed to initialize GLFW") + sys.exit(1) + + glfw.window_hint(glfw.CONTEXT_VERSION_MAJOR, 3) + glfw.window_hint(glfw.CONTEXT_VERSION_MINOR, 3) + glfw.window_hint(glfw.OPENGL_PROFILE, glfw.OPENGL_CORE_PROFILE) + glfw.window_hint(glfw.RESIZABLE, glfw.TRUE) + glfw.window_hint(glfw.FOCUSED, glfw.TRUE) + glfw.window_hint(glfw.FOCUS_ON_SHOW, glfw.TRUE) + + window = glfw.create_window(WINDOW_WIDTH, WINDOW_HEIGHT, "Deblock Shader Testbed", None, None) + if not window: + glfw.terminate() + print("ERROR: Failed to create window") + sys.exit(1) + + glfw.make_context_current(window) + glfw.set_key_callback(window, key_callback) + glfw.set_framebuffer_size_callback(window, framebuffer_size_callback) + glfw.swap_interval(1) # VSync + glfw.focus_window(window) + + print(f"OpenGL: {glGetString(GL_VERSION).decode()}") + + # Load shader (exit on failure at startup) + g_state['program'] = load_shader(shader_path) + if g_state['program'] is None: + glfw.terminate() + sys.exit(1) + + # Load texture + g_state['texture'], g_state['tex_size'] = load_mipmap_texture(mip_paths) + set_filter_mode('BILINEAR') + + # Create quad + aspect = g_state['tex_size'][0] / g_state['tex_size'][1] + g_state['quad_vao'] = create_quad(aspect) + + # Create cube + g_state['cube_vao'], g_state['cube_index_count'] = create_cube(1.0) + + # Init debug rendering + init_debug_rendering() + + glEnable(GL_DEPTH_TEST) + glClearColor(0.2, 0.2, 0.2, 1.0) + + g_state['last_time'] = glfw.get_time() + + # Main loop + while not glfw.window_should_close(window): + # Delta time + now = glfw.get_time() + dt = now - g_state['last_time'] + g_state['last_time'] = now + + # Input + glfw.poll_events() + process_held_keys(window, dt) + + # Projection matrix (recalculate for resize) + proj = perspective_matrix(FOV_DEGREES, WINDOW_WIDTH / WINDOW_HEIGHT, 0.001, 100.0) + + # Clear + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) + + # Draw quad or cube + glUseProgram(g_state['program']) + + # MVP (include rotation for cube mode) + trans = translation_matrix(g_state['x'], g_state['y'], g_state['z']) + rot_y = rotation_matrix_y(g_state['yaw']) + rot_x = rotation_matrix_x(g_state['pitch']) + model = trans @ rot_y @ rot_x + mvp = proj @ model + + loc = glGetUniformLocation(g_state['program'], "mvp") + if loc >= 0: + glUniformMatrix4fv(loc, 1, GL_TRUE, mvp) + + loc = glGetUniformLocation(g_state['program'], "tex") + if loc >= 0: + glUniform1i(loc, 0) + + loc = glGetUniformLocation(g_state['program'], "texSize") + if loc >= 0: + glUniform4f(loc, float(g_state['tex_size'][0]), float(g_state['tex_size'][1]), BLOCK_WIDTH, BLOCK_HEIGHT); + + loc = glGetUniformLocation(g_state['program'], "const0") + if loc >= 0: + c = g_state['const0'] + glUniform4f(loc, c[0], c[1], c[2], c[3]) + + loc = glGetUniformLocation(g_state['program'], "const1") + if loc >= 0: + c = g_state['const1'] + glUniform4f(loc, c[0], c[1], c[2], c[3]) + + glActiveTexture(GL_TEXTURE0) + glBindTexture(GL_TEXTURE_2D, g_state['texture']) + + if g_state['mode'] == 'CUBE': + glBindVertexArray(g_state['cube_vao']) + glDrawElements(GL_TRIANGLES, g_state['cube_index_count'], GL_UNSIGNED_INT, None) + else: + glBindVertexArray(g_state['quad_vao']) + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, None) + + # Draw debug overlay + draw_debug_text() + + glfw.swap_buffers(window) + + glfw.terminate() + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/external/basis_universal/test_files/base_xuastc_arith.ktx2 b/external/basis_universal/test_files/base_xuastc_arith.ktx2 new file mode 100644 index 0000000000..1b7aaa92ff Binary files /dev/null and b/external/basis_universal/test_files/base_xuastc_arith.ktx2 differ diff --git a/external/basis_universal/test_files/base_xuastc_zstd.ktx2 b/external/basis_universal/test_files/base_xuastc_zstd.ktx2 new file mode 100644 index 0000000000..50cf9584df Binary files /dev/null and b/external/basis_universal/test_files/base_xuastc_zstd.ktx2 differ diff --git a/external/basis_universal/test_files/kodim23.ktx2 b/external/basis_universal/test_files/kodim23.ktx2 new file mode 100644 index 0000000000..0672a0228b Binary files /dev/null and b/external/basis_universal/test_files/kodim23.ktx2 differ diff --git a/external/basis_universal/transcoder/basisu.h b/external/basis_universal/transcoder/basisu.h index 6e2efedd86..d4e339e917 100644 --- a/external/basis_universal/transcoder/basisu.h +++ b/external/basis_universal/transcoder/basisu.h @@ -1,5 +1,5 @@ // basisu.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,12 +15,16 @@ // limitations under the License. #pragma once +#ifndef BASISD_SUPPORT_XUASTC +#define BASISD_SUPPORT_XUASTC 1 +#endif + #ifdef _MSC_VER #pragma warning (disable : 4201) #pragma warning (disable : 4127) // warning C4127: conditional expression is constant #pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled. - + #endif // _MSC_VER #include @@ -40,9 +44,11 @@ #include #include #include +#include #include "basisu_containers.h" +// We never use min/max macros, slam them to off. #ifdef max #undef max #endif @@ -57,6 +63,7 @@ // Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development. #ifndef BASISU_FORCE_DEVEL_MESSAGES +// Do not check in as 1! #define BASISU_FORCE_DEVEL_MESSAGES 0 #endif @@ -93,6 +100,7 @@ namespace basisu typedef basisu::vector int_vec; typedef basisu::vector bool_vec; typedef basisu::vector float_vec; + typedef basisu::vector double_vec; void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); @@ -109,14 +117,14 @@ namespace basisu #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wclass-memaccess" #endif - + template inline void clear_obj(T& obj) { memset((void *)&obj, 0, sizeof(obj)); } #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop -#endif +#endif constexpr double cPiD = 3.14159265358979323846264338327950288; constexpr float REALLY_SMALL_FLOAT_VAL = .000000125f; @@ -124,7 +132,7 @@ namespace basisu constexpr float BIG_FLOAT_VAL = 1e+30f; template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } - + inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } inline float saturate(float value) { return clampf(value, 0, 1.0f); } inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } @@ -141,6 +149,41 @@ namespace basisu template inline T square(T a) { return a * a; } template inline T sign(T a) { return (a < 0) ? (T)-1 : ((a == 0) ? (T)0 : (T)1); } + inline int imod(int i, int d) + { + assert(i != INT_MIN); + + if (i >= 0) + return i % d; + + int r = (-i) % d; + return (r == 0) ? 0 : d - r; + } + + inline uint8_t safe_cast_uint8(uint32_t x) + { + assert(x <= UINT8_MAX); + return (uint8_t)x; + } + + inline int8_t safe_cast_int8(int32_t x) + { + assert((x >= INT8_MIN) && (x <= INT8_MAX)); + return (int8_t)x; + } + + inline uint16_t safe_cast_uint16(uint32_t x) + { + assert(x <= UINT16_MAX); + return (uint16_t)x; + } + + inline int16_t safe_cast_int16(int32_t x) + { + assert((x >= INT16_MIN) && (x <= INT16_MAX)); + return (int16_t)x; + } + inline bool equal_tol(float a, float b, float t) { return fabsf(a - b) <= ((maximum(fabsf(a), fabsf(b)) + 1.0f) * t); } inline bool equal_tol(double a, double b, double t) { return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } @@ -161,27 +204,35 @@ namespace basisu temp = 0; return temp; } - + inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + template inline T range_check(T v, T minv, T maxv) { assert(v >= minv && v <= maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } + template inline T range_check(T v, T maxv) { assert(v <= maxv); BASISU_NOTE_UNUSED(maxv); return v; } + template inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } template inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } // Open interval - inline bool in_bounds(int v, int l, int h) + inline bool is_in_bounds(int v, int l, int h) { return (v >= l) && (v < h); } // Closed interval - inline bool in_range(int v, int l, int h) + inline bool is_in_range(int v, int l, int h) + { + return (v >= l) && (v <= h); + } + + inline bool is_in_range(float v, float l, float h) { return (v >= l) && (v <= h); } @@ -192,7 +243,7 @@ namespace basisu inline uint32_t get_bit(uint32_t src, int ndx) { - assert(in_bounds(ndx, 0, 32)); + assert(is_in_bounds(ndx, 0, 32)); return (src >> ndx) & 1; } @@ -204,7 +255,7 @@ namespace basisu inline uint32_t get_bits(uint32_t val, int low, int high) { const int num_bits = (high - low) + 1; - assert(in_range(num_bits, 1, 32)); + assert(is_in_range(num_bits, 1, 32)); val >>= low; if (num_bits != 32) @@ -213,8 +264,8 @@ namespace basisu return val; } - template inline void append_vector(T &vec, const R *pObjs, size_t n) - { + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { if (n) { if (vec.size()) @@ -265,7 +316,7 @@ namespace basisu for (size_t i = 0; i < vec.size(); i++) vec[i] = obj; } - + inline uint64_t read_be64(const void *p) { uint64_t val = 0; @@ -307,6 +358,14 @@ namespace basisu return (m != 0) ? (y - m) : m; } + inline float posmodf(float x, float y) + { + float m = fmodf(x, y); + if (m < 0.0f) + m += y; + return m; + } + inline bool do_excl_ranges_overlap(int la, int ha, int lb, int hb) { assert(la < ha && lb < hb); @@ -331,7 +390,7 @@ namespace basisu pBytes[2] = (uint8_t)(val >> 16U); pBytes[3] = (uint8_t)(val >> 24U); } - + // Always little endian 1-8 byte unsigned int template struct packed_uint @@ -341,21 +400,21 @@ namespace basisu inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } - - inline packed_uint& operator= (uint64_t v) - { + + inline packed_uint& operator= (uint64_t v) + { // TODO: Add assert on truncation? - for (uint32_t i = 0; i < NumBytes; i++) - m_bytes[i] = static_cast(v >> (i * 8)); - return *this; + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; } - inline packed_uint& operator= (const packed_uint& rhs) - { - memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); return *this; } - + inline uint64_t get_uint64() const { // Some compilers may warn about this code. It clearly cannot access beyond the end of the m_bytes struct here. @@ -411,7 +470,7 @@ namespace basisu static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use get_uint32"); return static_cast(get_uint64()); } - + inline operator uint32_t() const { static_assert(NumBytes <= sizeof(uint32_t), "packed_uint too large to use operator uint32_t"); @@ -421,14 +480,14 @@ namespace basisu enum eZero { cZero }; enum eNoClamp { cNoClamp }; - + // Rice/Huffman entropy coding - + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. enum { - cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -454,13 +513,13 @@ namespace basisu enum class texture_format { cInvalidTextureFormat = -1, - + // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block - cETC2_ALPHA, // ETC2 EAC alpha block + cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A @@ -479,11 +538,11 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, - cUASTC4x4, + cUASTC4x4, cUASTC_HDR_4x4, cBC1_NV, cBC1_AMD, - + // Uncompressed/raw pixels cRGBA32, cRGB565, @@ -492,9 +551,89 @@ namespace basisu cABGR4444, cRGBA_HALF, cRGB_HALF, - cRGB_9E5 + cRGB_9E5, + + // All remaining ASTC LDR block size variants (other than 4x4 which is above). There are 14 total ASTC block sizes, including 4x4. + cASTC_LDR_5x4, + cASTC_LDR_5x5, + cASTC_LDR_6x5, + cASTC_LDR_6x6, + cASTC_LDR_8x5, + cASTC_LDR_8x6, + cASTC_LDR_10x5, + cASTC_LDR_10x6, + cASTC_LDR_8x8, + cASTC_LDR_10x8, + cASTC_LDR_10x10, + cASTC_LDR_12x10, + cASTC_LDR_12x12 }; + inline bool is_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_LDR_5x4: + case texture_format::cASTC_LDR_5x5: + case texture_format::cASTC_LDR_6x5: + case texture_format::cASTC_LDR_6x6: + case texture_format::cASTC_LDR_8x5: + case texture_format::cASTC_LDR_8x6: + case texture_format::cASTC_LDR_10x5: + case texture_format::cASTC_LDR_10x6: + case texture_format::cASTC_LDR_8x8: + case texture_format::cASTC_LDR_10x8: + case texture_format::cASTC_LDR_10x10: + case texture_format::cASTC_LDR_12x10: + case texture_format::cASTC_LDR_12x12: + return true; + default: + break; + } + return false; + } + + inline bool is_hdr_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_HDR_4x4: + case texture_format::cASTC_HDR_6x6: + return true; + default: + break; + } + return false; + } + + inline bool is_ldr_astc(texture_format fmt) + { + switch (fmt) + { + case texture_format::cASTC_LDR_4x4: + case texture_format::cASTC_LDR_5x4: + case texture_format::cASTC_LDR_5x5: + case texture_format::cASTC_LDR_6x5: + case texture_format::cASTC_LDR_6x6: + case texture_format::cASTC_LDR_8x5: + case texture_format::cASTC_LDR_8x6: + case texture_format::cASTC_LDR_10x5: + case texture_format::cASTC_LDR_10x6: + case texture_format::cASTC_LDR_8x8: + case texture_format::cASTC_LDR_10x8: + case texture_format::cASTC_LDR_10x10: + case texture_format::cASTC_LDR_12x10: + case texture_format::cASTC_LDR_12x12: + return true; + default: + break; + } + return false; + } + inline bool is_uncompressed_texture_format(texture_format fmt) { switch (fmt) @@ -555,7 +694,7 @@ namespace basisu default: break; } - + // Everything else is 16 bytes/block. return 16; } @@ -575,10 +714,21 @@ namespace basisu switch (fmt) { - case texture_format::cFXT1_RGB: - return 8; - case texture_format::cASTC_HDR_6x6: - return 6; + case texture_format::cFXT1_RGB: return 8; + case texture_format::cASTC_HDR_6x6: return 6; + case texture_format::cASTC_LDR_5x4: return 5; + case texture_format::cASTC_LDR_5x5: return 5; + case texture_format::cASTC_LDR_6x5: return 6; + case texture_format::cASTC_LDR_6x6: return 6; + case texture_format::cASTC_LDR_8x5: return 8; + case texture_format::cASTC_LDR_8x6: return 8; + case texture_format::cASTC_LDR_10x5: return 10; + case texture_format::cASTC_LDR_10x6: return 10; + case texture_format::cASTC_LDR_8x8: return 8; + case texture_format::cASTC_LDR_10x8: return 10; + case texture_format::cASTC_LDR_10x10: return 10; + case texture_format::cASTC_LDR_12x10: return 12; + case texture_format::cASTC_LDR_12x12: return 12; default: break; } @@ -591,8 +741,19 @@ namespace basisu switch (fmt) { - case texture_format::cASTC_HDR_6x6: - return 6; + case texture_format::cASTC_HDR_6x6: return 6; + case texture_format::cASTC_LDR_5x5: return 5; + case texture_format::cASTC_LDR_6x5: return 5; + case texture_format::cASTC_LDR_6x6: return 6; + case texture_format::cASTC_LDR_8x5: return 5; + case texture_format::cASTC_LDR_8x6: return 6; + case texture_format::cASTC_LDR_10x5: return 5; + case texture_format::cASTC_LDR_10x6: return 6; + case texture_format::cASTC_LDR_8x8: return 8; + case texture_format::cASTC_LDR_10x8: return 8; + case texture_format::cASTC_LDR_10x10: return 10; + case texture_format::cASTC_LDR_12x10: return 10; + case texture_format::cASTC_LDR_12x12: return 12; default: break; } @@ -623,5 +784,38 @@ namespace basisu { return !is_hdr_texture_format(fmt); } + + inline texture_format get_astc_ldr_texture_format(uint32_t width, uint32_t height) + { +#define BU_ASTC_LDR_MATCH_BLOCK_DIM(x, y, f) if ((width == (x)) && (height == (y))) return (f); + BU_ASTC_LDR_MATCH_BLOCK_DIM(4, 4, texture_format::cASTC_LDR_4x4); + BU_ASTC_LDR_MATCH_BLOCK_DIM(5, 4, texture_format::cASTC_LDR_5x4); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(5, 5, texture_format::cASTC_LDR_5x5); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(6, 5, texture_format::cASTC_LDR_6x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(6, 6, texture_format::cASTC_LDR_6x6); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 5, texture_format::cASTC_LDR_8x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 6, texture_format::cASTC_LDR_8x6); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 5, texture_format::cASTC_LDR_10x5); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 6, texture_format::cASTC_LDR_10x6); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(8, 8, texture_format::cASTC_LDR_8x8); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 8, texture_format::cASTC_LDR_10x8); + BU_ASTC_LDR_MATCH_BLOCK_DIM(10, 10, texture_format::cASTC_LDR_10x10); + + BU_ASTC_LDR_MATCH_BLOCK_DIM(12, 10, texture_format::cASTC_LDR_12x10); + BU_ASTC_LDR_MATCH_BLOCK_DIM(12, 12, texture_format::cASTC_LDR_12x12); +#undef BU_ASTC_LDR_MATCH_BLOCK_DIM + + return texture_format::cInvalidTextureFormat; + } + inline bool is_valid_astc_block_size(uint32_t width, uint32_t height) + { + return get_astc_ldr_texture_format(width, height) != texture_format::cInvalidTextureFormat; + } + } // namespace basisu + diff --git a/external/basis_universal/transcoder/basisu_astc_cfgs.inl b/external/basis_universal/transcoder/basisu_astc_cfgs.inl new file mode 100644 index 0000000000..b9f138f3f5 --- /dev/null +++ b/external/basis_universal/transcoder/basisu_astc_cfgs.inl @@ -0,0 +1,648 @@ +const uint32_t BU_TOTAL_ASTC_CFGS = 10311; +const uint8_t s_astc_cfg_table[BU_TOTAL_ASTC_CFGS*3] = { +176,72,0,208,72,0,240,72,0,16,73,0,48,73,0,80,73,0,112,73,0,176,130,0,208,130,0,240,130,0,16,131,0,48,131,0,80,131,0,112,131,0,176,132,0,208,132,0, +240,132,0,16,133,0,48,133,0,80,133,0,112,133,0,176,134,0,208,134,0,240,134,0,16,135,0,48,135,0,80,135,0,112,135,0,176,194,0,208,194,0,240,194,0,16,195,0, +48,195,0,80,195,0,112,195,0,176,196,0,208,196,0,240,196,0,16,197,0,48,197,0,80,197,0,112,197,0,176,198,0,208,198,0,240,198,0,16,199,0,48,199,0,80,199,0, +112,199,0,176,2,1,208,2,1,240,2,1,16,3,1,48,3,1,80,3,1,112,3,1,176,4,1,208,4,1,240,4,1,16,5,1,48,5,1,80,5,1,112,5,1,176,6,1, +208,6,1,240,6,1,16,7,1,48,7,1,80,7,1,112,7,1,176,8,1,208,8,1,240,8,1,16,9,1,48,9,1,80,9,1,112,9,1,176,66,1,208,66,1,240,66,1, +16,67,1,48,67,1,80,67,1,112,67,1,176,68,1,208,68,1,240,68,1,16,69,1,48,69,1,80,69,1,112,69,1,176,70,1,208,70,1,240,70,1,16,71,1,48,71,1, +80,71,1,112,71,1,176,72,1,208,72,1,240,72,1,16,73,1,48,73,1,80,73,1,112,73,1,16,1,2,48,1,2,80,1,2,112,1,2,16,17,2,48,17,2,80,17,2, +112,17,2,16,33,2,48,33,2,80,33,2,112,33,2,16,65,2,48,65,2,80,65,2,112,65,2,80,72,2,112,72,2,144,72,2,176,72,2,208,72,2,240,72,2,16,73,2, +48,73,2,80,73,2,112,73,2,16,81,2,48,81,2,80,81,2,112,81,2,10,97,2,42,97,2,73,97,2,105,97,2,16,129,2,48,129,2,80,129,2,112,129,2,80,130,2, +112,130,2,144,130,2,176,130,2,208,130,2,240,130,2,16,131,2,48,131,2,80,131,2,112,131,2,80,132,2,112,132,2,144,132,2,176,132,2,208,132,2,240,132,2,16,133,2, +48,133,2,80,133,2,112,133,2,80,134,2,112,134,2,144,134,2,176,134,2,208,134,2,240,134,2,16,135,2,48,135,2,80,135,2,112,135,2,16,145,2,48,145,2,80,145,2, +112,145,2,10,161,2,42,161,2,73,161,2,105,161,2,16,193,2,48,193,2,80,193,2,112,193,2,80,194,2,112,194,2,144,194,2,176,194,2,208,194,2,240,194,2,16,195,2, +48,195,2,80,195,2,112,195,2,80,196,2,112,196,2,144,196,2,176,196,2,208,196,2,240,196,2,16,197,2,48,197,2,80,197,2,112,197,2,80,198,2,112,198,2,144,198,2, +176,198,2,208,198,2,240,198,2,16,199,2,48,199,2,80,199,2,112,199,2,10,209,2,42,209,2,73,209,2,105,209,2,4,225,2,36,225,2,67,225,2,99,225,2,16,1,3, +48,1,3,80,1,3,112,1,3,80,2,3,112,2,3,144,2,3,176,2,3,208,2,3,240,2,3,16,3,3,48,3,3,80,3,3,112,3,3,80,4,3,112,4,3,144,4,3, +176,4,3,208,4,3,240,4,3,16,5,3,48,5,3,80,5,3,112,5,3,80,6,3,112,6,3,144,6,3,176,6,3,208,6,3,240,6,3,16,7,3,48,7,3,80,7,3, +112,7,3,80,8,3,112,8,3,144,8,3,176,8,3,208,8,3,240,8,3,16,9,3,48,9,3,80,9,3,112,9,3,10,17,3,42,17,3,73,17,3,105,17,3,4,33,3, +36,33,3,67,33,3,99,33,3,16,65,3,48,65,3,80,65,3,112,65,3,80,66,3,112,66,3,144,66,3,176,66,3,208,66,3,240,66,3,15,67,3,45,67,3,76,67,3, +106,67,3,80,68,3,112,68,3,144,68,3,176,68,3,208,68,3,240,68,3,15,69,3,45,69,3,76,69,3,106,69,3,80,70,3,112,70,3,144,70,3,176,70,3,208,70,3, +240,70,3,15,71,3,45,71,3,76,71,3,106,71,3,80,72,3,112,72,3,144,72,3,176,72,3,208,72,3,240,72,3,15,73,3,45,73,3,76,73,3,106,73,3,6,81,3, +37,81,3,69,81,3,100,81,3,176,0,4,208,0,4,240,0,4,16,1,4,48,1,4,80,1,4,112,1,4,176,16,4,208,16,4,240,16,4,16,17,4,48,17,4,80,17,4, +112,17,4,176,32,4,208,32,4,240,32,4,16,33,4,48,33,4,80,33,4,112,33,4,176,64,4,208,64,4,240,64,4,16,65,4,48,65,4,80,65,4,112,65,4,48,72,4, +80,72,4,112,72,4,144,72,4,176,72,4,208,72,4,240,72,4,16,73,4,48,73,4,80,73,4,109,73,4,176,80,4,208,80,4,240,80,4,16,81,4,48,81,4,79,81,4, +110,81,4,170,96,4,202,96,4,233,96,4,8,97,4,40,97,4,71,97,4,102,97,4,176,128,4,208,128,4,240,128,4,16,129,4,48,129,4,80,129,4,112,129,4,48,130,4, +80,130,4,112,130,4,144,130,4,176,130,4,208,130,4,240,130,4,16,131,4,48,131,4,80,131,4,109,131,4,48,132,4,80,132,4,112,132,4,144,132,4,176,132,4,208,132,4, +240,132,4,16,133,4,48,133,4,80,133,4,109,133,4,48,134,4,80,134,4,112,134,4,144,134,4,176,134,4,208,134,4,240,134,4,16,135,4,48,135,4,80,135,4,109,135,4, +176,144,4,208,144,4,240,144,4,16,145,4,48,145,4,79,145,4,110,145,4,170,160,4,202,160,4,233,160,4,8,161,4,40,161,4,71,161,4,102,161,4,176,192,4,208,192,4, +240,192,4,16,193,4,48,193,4,80,193,4,112,193,4,48,194,4,80,194,4,112,194,4,144,194,4,176,194,4,208,194,4,240,194,4,14,195,4,43,195,4,73,195,4,102,195,4, +48,196,4,80,196,4,112,196,4,144,196,4,176,196,4,208,196,4,240,196,4,14,197,4,43,197,4,73,197,4,102,197,4,48,198,4,80,198,4,112,198,4,144,198,4,176,198,4, +208,198,4,240,198,4,14,199,4,43,199,4,73,199,4,102,199,4,170,208,4,202,208,4,233,208,4,8,209,4,40,209,4,71,209,4,102,209,4,164,224,4,196,224,4,227,224,4, +3,225,4,34,225,4,66,225,4,97,225,4,176,0,5,208,0,5,240,0,5,16,1,5,48,1,5,80,1,5,112,1,5,48,2,5,80,2,5,112,2,5,144,2,5,176,2,5, +208,2,5,240,2,5,14,3,5,43,3,5,73,3,5,102,3,5,48,4,5,80,4,5,112,4,5,144,4,5,176,4,5,208,4,5,240,4,5,14,5,5,43,5,5,73,5,5, +102,5,5,48,6,5,80,6,5,112,6,5,144,6,5,176,6,5,208,6,5,240,6,5,14,7,5,43,7,5,73,7,5,102,7,5,48,8,5,80,8,5,112,8,5,144,8,5, +176,8,5,208,8,5,240,8,5,14,9,5,43,9,5,73,9,5,102,9,5,170,16,5,202,16,5,233,16,5,8,17,5,40,17,5,71,17,5,102,17,5,164,32,5,196,32,5, +227,32,5,3,33,5,34,33,5,66,33,5,97,33,5,176,64,5,208,64,5,240,64,5,16,65,5,48,65,5,80,65,5,112,65,5,48,66,5,80,66,5,112,66,5,144,66,5, +175,66,5,204,66,5,235,66,5,9,67,5,38,67,5,69,67,5,99,67,5,48,68,5,80,68,5,112,68,5,144,68,5,175,68,5,204,68,5,235,68,5,9,69,5,38,69,5, +69,69,5,99,69,5,48,70,5,80,70,5,112,70,5,144,70,5,175,70,5,204,70,5,235,70,5,9,71,5,38,71,5,69,71,5,99,71,5,48,72,5,80,72,5,112,72,5, +144,72,5,175,72,5,204,72,5,235,72,5,9,73,5,38,73,5,69,73,5,99,73,5,166,80,5,197,80,5,229,80,5,4,81,5,36,81,5,67,81,5,99,81,5,112,0,6, +144,0,6,176,0,6,208,0,6,240,0,6,16,1,6,48,1,6,80,1,6,112,1,6,112,16,6,144,16,6,176,16,6,208,16,6,240,16,6,16,17,6,48,17,6,80,17,6, +112,17,6,112,32,6,144,32,6,176,32,6,208,32,6,240,32,6,16,33,6,48,33,6,80,33,6,112,33,6,112,64,6,144,64,6,176,64,6,208,64,6,240,64,6,16,65,6, +48,65,6,80,65,6,112,65,6,48,72,6,80,72,6,112,72,6,144,72,6,176,72,6,208,72,6,240,72,6,13,73,6,40,73,6,68,73,6,112,80,6,144,80,6,176,80,6, +208,80,6,239,80,6,14,81,6,44,81,6,76,81,6,106,81,6,106,96,6,138,96,6,169,96,6,200,96,6,231,96,6,6,97,6,37,97,6,69,97,6,100,97,6,112,128,6, +144,128,6,176,128,6,208,128,6,240,128,6,16,129,6,48,129,6,80,129,6,112,129,6,48,130,6,80,130,6,112,130,6,144,130,6,176,130,6,208,130,6,240,130,6,13,131,6, +40,131,6,68,131,6,48,132,6,80,132,6,112,132,6,144,132,6,176,132,6,208,132,6,240,132,6,13,133,6,40,133,6,68,133,6,48,134,6,80,134,6,112,134,6,144,134,6, +176,134,6,208,134,6,240,134,6,13,135,6,40,135,6,68,135,6,112,144,6,144,144,6,176,144,6,208,144,6,239,144,6,14,145,6,44,145,6,76,145,6,106,145,6,106,160,6, +138,160,6,169,160,6,200,160,6,231,160,6,6,161,6,37,161,6,69,161,6,100,161,6,112,192,6,144,192,6,176,192,6,208,192,6,240,192,6,16,193,6,48,193,6,80,193,6, +112,193,6,48,194,6,80,194,6,112,194,6,144,194,6,176,194,6,205,194,6,234,194,6,6,195,6,35,195,6,64,195,6,48,196,6,80,196,6,112,196,6,144,196,6,176,196,6, +205,196,6,234,196,6,6,197,6,35,197,6,64,197,6,48,198,6,80,198,6,112,198,6,144,198,6,176,198,6,205,198,6,234,198,6,6,199,6,35,199,6,64,199,6,106,208,6, +138,208,6,169,208,6,200,208,6,231,208,6,6,209,6,37,209,6,69,209,6,100,209,6,100,224,6,132,224,6,163,224,6,195,224,6,226,224,6,1,225,6,33,225,6,64,225,6, +96,225,6,112,0,7,144,0,7,176,0,7,208,0,7,240,0,7,16,1,7,48,1,7,80,1,7,112,1,7,48,2,7,80,2,7,112,2,7,144,2,7,176,2,7,205,2,7, +234,2,7,6,3,7,35,3,7,64,3,7,48,4,7,80,4,7,112,4,7,144,4,7,176,4,7,205,4,7,234,4,7,6,5,7,35,5,7,64,5,7,48,6,7,80,6,7, +112,6,7,144,6,7,176,6,7,205,6,7,234,6,7,6,7,7,35,7,7,64,7,7,48,8,7,80,8,7,112,8,7,144,8,7,176,8,7,205,8,7,234,8,7,6,9,7, +35,9,7,64,9,7,106,16,7,138,16,7,169,16,7,200,16,7,231,16,7,6,17,7,37,17,7,69,17,7,100,17,7,100,32,7,132,32,7,163,32,7,195,32,7,226,32,7, +1,33,7,33,33,7,64,33,7,96,33,7,112,64,7,144,64,7,176,64,7,208,64,7,240,64,7,16,65,7,48,65,7,80,65,7,111,65,7,48,66,7,80,66,7,111,66,7, +141,66,7,170,66,7,199,66,7,230,66,7,3,67,7,32,67,7,48,68,7,80,68,7,111,68,7,141,68,7,170,68,7,199,68,7,230,68,7,3,69,7,32,69,7,48,70,7, +80,70,7,111,70,7,141,70,7,170,70,7,199,70,7,230,70,7,3,71,7,32,71,7,48,72,7,80,72,7,111,72,7,141,72,7,170,72,7,199,72,7,230,72,7,3,73,7, +32,73,7,102,80,7,133,80,7,164,80,7,196,80,7,227,80,7,3,81,7,34,81,7,65,81,7,97,81,7,80,0,8,112,0,8,144,0,8,176,0,8,208,0,8,240,0,8, +16,1,8,48,1,8,80,1,8,112,1,8,80,16,8,112,16,8,144,16,8,176,16,8,208,16,8,240,16,8,16,17,8,48,17,8,80,17,8,112,17,8,80,32,8,112,32,8, +144,32,8,176,32,8,208,32,8,240,32,8,16,33,8,47,33,8,77,33,8,107,33,8,80,64,8,112,64,8,144,64,8,176,64,8,208,64,8,240,64,8,16,65,8,48,65,8, +80,65,8,112,65,8,16,72,8,48,72,8,80,72,8,112,72,8,144,72,8,176,72,8,205,72,8,232,72,8,1,73,8,80,80,8,112,80,8,144,80,8,175,80,8,206,80,8, +236,80,8,11,81,8,41,81,8,72,81,8,102,81,8,74,96,8,105,96,8,136,96,8,167,96,8,198,96,8,229,96,8,4,97,8,35,97,8,66,97,8,97,97,8,80,128,8, +112,128,8,144,128,8,176,128,8,208,128,8,240,128,8,16,129,8,48,129,8,80,129,8,112,129,8,16,130,8,48,130,8,80,130,8,112,130,8,144,130,8,176,130,8,205,130,8, +232,130,8,1,131,8,16,132,8,48,132,8,80,132,8,112,132,8,144,132,8,176,132,8,205,132,8,232,132,8,1,133,8,16,134,8,48,134,8,80,134,8,112,134,8,144,134,8, +176,134,8,205,134,8,232,134,8,1,135,8,80,144,8,112,144,8,144,144,8,175,144,8,206,144,8,236,144,8,11,145,8,41,145,8,72,145,8,102,145,8,74,160,8,105,160,8, +136,160,8,167,160,8,198,160,8,229,160,8,4,161,8,35,161,8,66,161,8,97,161,8,80,192,8,112,192,8,144,192,8,176,192,8,208,192,8,240,192,8,16,193,8,48,193,8, +80,193,8,112,193,8,16,194,8,48,194,8,80,194,8,112,194,8,143,194,8,170,194,8,198,194,8,227,194,8,16,196,8,48,196,8,80,196,8,112,196,8,143,196,8,170,196,8, +198,196,8,227,196,8,16,198,8,48,198,8,80,198,8,112,198,8,143,198,8,170,198,8,198,198,8,227,198,8,74,208,8,105,208,8,136,208,8,167,208,8,198,208,8,229,208,8, +4,209,8,35,209,8,66,209,8,97,209,8,68,224,8,99,224,8,131,224,8,162,224,8,193,224,8,225,224,8,0,225,8,32,225,8,80,0,9,112,0,9,144,0,9,176,0,9, +208,0,9,240,0,9,16,1,9,48,1,9,80,1,9,112,1,9,16,2,9,48,2,9,80,2,9,112,2,9,143,2,9,170,2,9,198,2,9,227,2,9,16,4,9,48,4,9, +80,4,9,112,4,9,143,4,9,170,4,9,198,4,9,227,4,9,16,6,9,48,6,9,80,6,9,112,6,9,143,6,9,170,6,9,198,6,9,227,6,9,16,8,9,48,8,9, +80,8,9,112,8,9,143,8,9,170,8,9,198,8,9,227,8,9,74,16,9,105,16,9,136,16,9,167,16,9,198,16,9,229,16,9,4,17,9,35,17,9,66,17,9,97,17,9, +68,32,9,99,32,9,131,32,9,162,32,9,193,32,9,225,32,9,0,33,9,32,33,9,80,64,9,112,64,9,144,64,9,176,64,9,208,64,9,240,64,9,15,65,9,46,65,9, +76,65,9,107,65,9,16,66,9,48,66,9,79,66,9,108,66,9,137,66,9,166,66,9,195,66,9,224,66,9,16,68,9,48,68,9,79,68,9,108,68,9,137,68,9,166,68,9, +195,68,9,224,68,9,16,70,9,48,70,9,79,70,9,108,70,9,137,70,9,166,70,9,195,70,9,224,70,9,16,72,9,48,72,9,79,72,9,108,72,9,137,72,9,166,72,9, +195,72,9,224,72,9,70,80,9,101,80,9,132,80,9,163,80,9,195,80,9,226,80,9,1,81,9,32,81,9,64,81,9,80,0,10,112,0,10,144,0,10,176,0,10,208,0,10, +240,0,10,16,1,10,48,1,10,80,1,10,112,1,10,80,16,10,112,16,10,144,16,10,176,16,10,208,16,10,240,16,10,16,17,10,48,17,10,80,17,10,109,17,10,80,32,10, +112,32,10,144,32,10,176,32,10,208,32,10,240,32,10,13,33,10,43,33,10,73,33,10,102,33,10,80,64,10,112,64,10,144,64,10,176,64,10,208,64,10,240,64,10,16,65,10, +48,65,10,80,65,10,112,65,10,16,72,10,48,72,10,80,72,10,112,72,10,144,72,10,170,72,10,195,72,10,80,80,10,112,80,10,143,80,10,173,80,10,203,80,10,234,80,10, +8,81,10,38,81,10,68,81,10,99,81,10,73,96,10,104,96,10,135,96,10,166,96,10,197,96,10,228,96,10,2,97,10,33,97,10,64,97,10,80,128,10,112,128,10,144,128,10, +176,128,10,208,128,10,240,128,10,16,129,10,48,129,10,80,129,10,112,129,10,16,130,10,48,130,10,80,130,10,112,130,10,144,130,10,170,130,10,195,130,10,16,132,10,48,132,10, +80,132,10,112,132,10,144,132,10,170,132,10,195,132,10,16,134,10,48,134,10,80,134,10,112,134,10,144,134,10,170,134,10,195,134,10,80,144,10,112,144,10,143,144,10,173,144,10, +203,144,10,234,144,10,8,145,10,38,145,10,68,145,10,99,145,10,73,160,10,104,160,10,135,160,10,166,160,10,197,160,10,228,160,10,2,161,10,33,161,10,64,161,10,80,192,10, +112,192,10,144,192,10,176,192,10,208,192,10,240,192,10,16,193,10,48,193,10,79,193,10,108,193,10,16,194,10,48,194,10,80,194,10,109,194,10,138,194,10,164,194,10,16,196,10, +48,196,10,80,196,10,109,196,10,138,196,10,164,196,10,16,198,10,48,198,10,80,198,10,109,198,10,138,198,10,164,198,10,73,208,10,104,208,10,135,208,10,166,208,10,197,208,10, +228,208,10,2,209,10,33,209,10,64,209,10,67,224,10,99,224,10,130,224,10,161,224,10,192,224,10,224,224,10,80,0,11,112,0,11,144,0,11,176,0,11,208,0,11,240,0,11, +16,1,11,48,1,11,79,1,11,108,1,11,16,2,11,48,2,11,80,2,11,109,2,11,138,2,11,164,2,11,16,4,11,48,4,11,80,4,11,109,4,11,138,4,11,164,4,11, +16,6,11,48,6,11,80,6,11,109,6,11,138,6,11,164,6,11,16,8,11,48,8,11,80,8,11,109,8,11,138,8,11,164,8,11,73,16,11,104,16,11,135,16,11,166,16,11, +197,16,11,228,16,11,2,17,11,33,17,11,64,17,11,67,32,11,99,32,11,130,32,11,161,32,11,192,32,11,224,32,11,80,64,11,112,64,11,144,64,11,176,64,11,208,64,11, +238,64,11,12,65,11,42,65,11,73,65,11,103,65,11,16,66,11,48,66,11,76,66,11,104,66,11,133,66,11,161,66,11,16,68,11,48,68,11,76,68,11,104,68,11,133,68,11, +161,68,11,16,70,11,48,70,11,76,70,11,104,70,11,133,70,11,161,70,11,16,72,11,48,72,11,76,72,11,104,72,11,133,72,11,161,72,11,69,80,11,100,80,11,131,80,11, +162,80,11,193,80,11,225,80,11,0,81,11,48,0,12,80,0,12,112,0,12,144,0,12,176,0,12,208,0,12,240,0,12,16,1,12,48,1,12,80,1,12,112,1,12,48,16,12, +80,16,12,112,16,12,144,16,12,176,16,12,208,16,12,240,16,12,16,17,12,45,17,12,74,17,12,102,17,12,48,32,12,80,32,12,112,32,12,144,32,12,176,32,12,206,32,12, +236,32,12,9,33,12,38,33,12,68,33,12,97,33,12,48,64,12,80,64,12,112,64,12,144,64,12,176,64,12,208,64,12,240,64,12,16,65,12,48,65,12,80,65,12,111,65,12, +16,72,12,48,72,12,80,72,12,112,72,12,138,72,12,161,72,12,48,80,12,80,80,12,111,80,12,141,80,12,171,80,12,201,80,12,231,80,12,5,81,12,35,81,12,65,81,12, +42,96,12,72,96,12,103,96,12,134,96,12,164,96,12,195,96,12,226,96,12,0,97,12,48,128,12,80,128,12,112,128,12,144,128,12,176,128,12,208,128,12,240,128,12,16,129,12, +48,129,12,80,129,12,111,129,12,16,130,12,48,130,12,80,130,12,112,130,12,138,130,12,161,130,12,16,132,12,48,132,12,80,132,12,112,132,12,138,132,12,161,132,12,16,134,12, +48,134,12,80,134,12,112,134,12,138,134,12,161,134,12,48,144,12,80,144,12,111,144,12,141,144,12,171,144,12,201,144,12,231,144,12,5,145,12,35,145,12,65,145,12,42,160,12, +72,160,12,103,160,12,134,160,12,164,160,12,195,160,12,226,160,12,0,161,12,48,192,12,80,192,12,112,192,12,144,192,12,176,192,12,208,192,12,240,192,12,15,193,12,44,193,12, +74,193,12,103,193,12,16,194,12,48,194,12,78,194,12,105,194,12,132,194,12,16,196,12,48,196,12,78,196,12,105,196,12,132,196,12,16,198,12,48,198,12,78,198,12,105,198,12, +132,198,12,42,208,12,72,208,12,103,208,12,134,208,12,164,208,12,195,208,12,226,208,12,0,209,12,36,224,12,67,224,12,98,224,12,129,224,12,160,224,12,48,0,13,80,0,13, +112,0,13,144,0,13,176,0,13,208,0,13,240,0,13,15,1,13,44,1,13,74,1,13,103,1,13,16,2,13,48,2,13,78,2,13,105,2,13,132,2,13,16,4,13,48,4,13, +78,4,13,105,4,13,132,4,13,16,6,13,48,6,13,78,6,13,105,6,13,132,6,13,16,8,13,48,8,13,78,8,13,105,8,13,132,8,13,42,16,13,72,16,13,103,16,13, +134,16,13,164,16,13,195,16,13,226,16,13,0,17,13,36,32,13,67,32,13,98,32,13,129,32,13,160,32,13,48,64,13,80,64,13,112,64,13,144,64,13,175,64,13,205,64,13, +236,64,13,9,65,13,39,65,13,70,65,13,99,65,13,16,66,13,45,66,13,73,66,13,100,66,13,129,66,13,16,68,13,45,68,13,73,68,13,100,68,13,129,68,13,16,70,13, +45,70,13,73,70,13,100,70,13,129,70,13,16,72,13,45,72,13,73,72,13,100,72,13,129,72,13,37,80,13,68,80,13,99,80,13,130,80,13,161,80,13,192,80,13,48,0,14, +80,0,14,112,0,14,144,0,14,176,0,14,208,0,14,240,0,14,16,1,14,48,1,14,80,1,14,112,1,14,48,16,14,80,16,14,112,16,14,144,16,14,176,16,14,208,16,14, +240,16,14,12,17,14,39,17,14,68,17,14,48,32,14,80,32,14,112,32,14,144,32,14,174,32,14,203,32,14,233,32,14,5,33,14,34,33,14,64,33,14,48,64,14,80,64,14, +112,64,14,144,64,14,176,64,14,208,64,14,240,64,14,16,65,14,48,65,14,77,65,14,103,65,14,16,72,14,48,72,14,80,72,14,106,72,14,131,72,14,48,80,14,79,80,14, +109,80,14,139,80,14,169,80,14,198,80,14,228,80,14,2,81,14,32,81,14,41,96,14,71,96,14,102,96,14,133,96,14,163,96,14,193,96,14,224,96,14,48,128,14,80,128,14, +112,128,14,144,128,14,176,128,14,208,128,14,240,128,14,16,129,14,48,129,14,77,129,14,103,129,14,16,130,14,48,130,14,80,130,14,106,130,14,131,130,14,16,132,14,48,132,14, +80,132,14,106,132,14,131,132,14,16,134,14,48,134,14,80,134,14,106,134,14,131,134,14,48,144,14,79,144,14,109,144,14,139,144,14,169,144,14,198,144,14,228,144,14,2,145,14, +32,145,14,41,160,14,71,160,14,102,160,14,133,160,14,163,160,14,193,160,14,224,160,14,48,192,14,80,192,14,112,192,14,144,192,14,176,192,14,208,192,14,239,192,14,11,193,14, +40,193,14,70,193,14,98,193,14,16,194,14,48,194,14,74,194,14,100,194,14,16,196,14,48,196,14,74,196,14,100,196,14,16,198,14,48,198,14,74,198,14,100,198,14,41,208,14, +71,208,14,102,208,14,133,208,14,163,208,14,193,208,14,224,208,14,35,224,14,66,224,14,97,224,14,128,224,14,48,0,15,80,0,15,112,0,15,144,0,15,176,0,15,208,0,15, +239,0,15,11,1,15,40,1,15,70,1,15,98,1,15,16,2,15,48,2,15,74,2,15,100,2,15,16,4,15,48,4,15,74,4,15,100,4,15,16,6,15,48,6,15,74,6,15, +100,6,15,16,8,15,48,8,15,74,8,15,100,8,15,41,16,15,71,16,15,102,16,15,133,16,15,163,16,15,193,16,15,224,16,15,35,32,15,66,32,15,97,32,15,128,32,15, +48,64,15,80,64,15,112,64,15,144,64,15,173,64,15,203,64,15,233,64,15,6,65,15,36,65,15,66,65,15,96,65,15,16,66,15,43,66,15,70,66,15,97,66,15,16,68,15, +43,68,15,70,68,15,97,68,15,16,70,15,43,70,15,70,70,15,97,70,15,16,72,15,43,72,15,70,72,15,97,72,15,37,80,15,67,80,15,98,80,15,129,80,15,160,80,15, +48,0,16,80,0,16,112,0,16,144,0,16,176,0,16,208,0,16,240,0,16,16,1,16,48,1,16,80,1,16,48,16,16,80,16,16,112,16,16,144,16,16,176,16,16,208,16,16, +236,16,16,6,17,16,33,17,16,48,32,16,80,32,16,112,32,16,143,32,16,171,32,16,200,32,16,229,32,16,1,33,16,48,64,16,80,64,16,112,64,16,144,64,16,176,64,16, +208,64,16,240,64,16,15,65,16,42,65,16,70,65,16,16,72,16,48,72,16,77,72,16,99,72,16,48,80,16,78,80,16,107,80,16,137,80,16,166,80,16,196,80,16,226,80,16, +40,96,16,70,96,16,101,96,16,131,96,16,161,96,16,192,96,16,48,128,16,80,128,16,112,128,16,144,128,16,176,128,16,208,128,16,240,128,16,15,129,16,42,129,16,70,129,16, +16,130,16,48,130,16,77,130,16,99,130,16,16,132,16,48,132,16,77,132,16,99,132,16,16,134,16,48,134,16,77,134,16,99,134,16,48,144,16,78,144,16,107,144,16,137,144,16, +166,144,16,196,144,16,226,144,16,40,160,16,70,160,16,101,160,16,131,160,16,161,160,16,192,160,16,48,192,16,80,192,16,112,192,16,144,192,16,176,192,16,206,192,16,235,192,16, +7,193,16,36,193,16,65,193,16,16,194,16,46,194,16,70,194,16,16,196,16,46,196,16,70,196,16,16,198,16,46,198,16,70,198,16,40,208,16,70,208,16,101,208,16,131,208,16, +161,208,16,192,208,16,35,224,16,65,224,16,96,224,16,128,224,16,48,0,17,80,0,17,112,0,17,144,0,17,176,0,17,206,0,17,235,0,17,7,1,17,36,1,17,65,1,17, +16,2,17,46,2,17,70,2,17,16,4,17,46,4,17,70,4,17,16,6,17,46,6,17,70,6,17,16,8,17,46,8,17,70,8,17,40,16,17,70,16,17,101,16,17,131,16,17, +161,16,17,192,16,17,35,32,17,65,32,17,96,32,17,128,32,17,48,64,17,80,64,17,112,64,17,142,64,17,171,64,17,200,64,17,230,64,17,3,65,17,33,65,17,16,66,17, +41,66,17,67,66,17,16,68,17,41,68,17,67,68,17,16,70,17,41,70,17,67,70,17,16,72,17,41,72,17,67,72,17,36,80,17,67,80,17,97,80,17,128,80,17,48,0,18, +80,0,18,112,0,18,144,0,18,176,0,18,208,0,18,240,0,18,16,1,18,46,1,18,48,16,18,80,16,18,112,16,18,144,16,18,176,16,18,202,16,18,230,16,18,0,17,18, +48,32,18,80,32,18,111,32,18,140,32,18,168,32,18,196,32,18,225,32,18,48,64,18,80,64,18,112,64,18,144,64,18,176,64,18,208,64,18,239,64,18,9,65,18,35,65,18, +16,72,18,48,72,18,71,72,18,47,80,18,76,80,18,105,80,18,135,80,18,164,80,18,193,80,18,39,96,18,69,96,18,99,96,18,130,96,18,160,96,18,48,128,18,80,128,18, +112,128,18,144,128,18,176,128,18,208,128,18,239,128,18,9,129,18,35,129,18,16,130,18,48,130,18,71,130,18,16,132,18,48,132,18,71,132,18,16,134,18,48,134,18,71,134,18, +47,144,18,76,144,18,105,144,18,135,144,18,164,144,18,193,144,18,39,160,18,69,160,18,99,160,18,130,160,18,160,160,18,48,192,18,80,192,18,112,192,18,144,192,18,174,192,18, +202,192,18,231,192,18,3,193,18,16,194,18,43,194,18,66,194,18,16,196,18,43,196,18,66,196,18,16,198,18,43,198,18,66,198,18,39,208,18,69,208,18,99,208,18,130,208,18, +160,208,18,34,224,18,65,224,18,96,224,18,48,0,19,80,0,19,112,0,19,144,0,19,174,0,19,202,0,19,231,0,19,3,1,19,16,2,19,43,2,19,66,2,19,16,4,19, +43,4,19,66,4,19,16,6,19,43,6,19,66,6,19,16,8,19,43,8,19,66,8,19,39,16,19,69,16,19,99,16,19,130,16,19,160,16,19,34,32,19,65,32,19,96,32,19, +48,64,19,80,64,19,110,64,19,140,64,19,169,64,19,198,64,19,227,64,19,0,65,19,16,66,19,38,66,19,64,66,19,16,68,19,38,68,19,64,68,19,16,70,19,38,70,19, +64,70,19,16,72,19,38,72,19,64,72,19,35,80,19,66,80,19,96,80,19,16,0,20,48,0,20,80,0,20,112,0,20,144,0,20,176,0,20,208,0,20,240,0,20,14,1,20, +16,16,20,48,16,20,80,16,20,112,16,20,144,16,20,172,16,20,198,16,20,225,16,20,16,32,20,48,32,20,80,32,20,109,32,20,138,32,20,165,32,20,193,32,20,16,64,20, +48,64,20,80,64,20,112,64,20,144,64,20,176,64,20,207,64,20,234,64,20,3,65,20,16,72,20,48,72,20,65,72,20,16,80,20,46,80,20,75,80,20,104,80,20,133,80,20, +162,80,20,10,96,20,39,96,20,68,96,20,98,96,20,129,96,20,16,128,20,48,128,20,80,128,20,112,128,20,144,128,20,176,128,20,207,128,20,234,128,20,3,129,20,16,130,20, +48,130,20,65,130,20,16,132,20,48,132,20,65,132,20,16,134,20,48,134,20,65,134,20,16,144,20,46,144,20,75,144,20,104,144,20,133,144,20,162,144,20,10,160,20,39,160,20, +68,160,20,98,160,20,129,160,20,16,192,20,48,192,20,80,192,20,112,192,20,144,192,20,171,192,20,199,192,20,228,192,20,16,194,20,40,194,20,16,196,20,40,196,20,16,198,20, +40,198,20,10,208,20,39,208,20,68,208,20,98,208,20,129,208,20,4,224,20,34,224,20,64,224,20,16,0,21,48,0,21,80,0,21,112,0,21,144,0,21,171,0,21,199,0,21, +228,0,21,16,2,21,40,2,21,16,4,21,40,4,21,16,6,21,40,6,21,16,8,21,40,8,21,10,16,21,39,16,21,68,16,21,98,16,21,129,16,21,4,32,21,34,32,21, +64,32,21,16,64,21,48,64,21,79,64,21,108,64,21,138,64,21,166,64,21,195,64,21,225,64,21,15,66,21,36,66,21,15,68,21,36,68,21,15,70,21,36,70,21,15,72,21, +36,72,21,6,80,21,35,80,21,65,80,21,96,80,21,16,1,22,48,1,22,80,1,22,112,1,22,16,17,22,48,17,22,80,17,22,112,17,22,16,33,22,48,33,22,80,33,22, +112,33,22,16,65,22,48,65,22,80,65,22,112,65,22,80,72,22,112,72,22,144,72,22,176,72,22,208,72,22,240,72,22,16,73,22,48,73,22,80,73,22,112,73,22,16,81,22, +48,81,22,80,81,22,112,81,22,10,97,22,42,97,22,73,97,22,105,97,22,16,129,22,48,129,22,80,129,22,112,129,22,80,130,22,112,130,22,144,130,22,176,130,22,208,130,22, +240,130,22,16,131,22,48,131,22,80,131,22,112,131,22,80,132,22,112,132,22,144,132,22,176,132,22,208,132,22,240,132,22,16,133,22,48,133,22,80,133,22,112,133,22,80,134,22, +112,134,22,144,134,22,176,134,22,208,134,22,240,134,22,16,135,22,48,135,22,80,135,22,112,135,22,16,145,22,48,145,22,80,145,22,112,145,22,10,161,22,42,161,22,73,161,22, +105,161,22,16,193,22,48,193,22,80,193,22,112,193,22,80,194,22,112,194,22,144,194,22,176,194,22,208,194,22,240,194,22,16,195,22,48,195,22,80,195,22,112,195,22,80,196,22, +112,196,22,144,196,22,176,196,22,208,196,22,240,196,22,16,197,22,48,197,22,80,197,22,112,197,22,80,198,22,112,198,22,144,198,22,176,198,22,208,198,22,240,198,22,16,199,22, +48,199,22,80,199,22,112,199,22,10,209,22,42,209,22,73,209,22,105,209,22,4,225,22,36,225,22,67,225,22,99,225,22,16,1,23,48,1,23,80,1,23,112,1,23,80,2,23, +112,2,23,144,2,23,176,2,23,208,2,23,240,2,23,16,3,23,48,3,23,80,3,23,112,3,23,80,4,23,112,4,23,144,4,23,176,4,23,208,4,23,240,4,23,16,5,23, +48,5,23,80,5,23,112,5,23,80,6,23,112,6,23,144,6,23,176,6,23,208,6,23,240,6,23,16,7,23,48,7,23,80,7,23,112,7,23,80,8,23,112,8,23,144,8,23, +176,8,23,208,8,23,240,8,23,16,9,23,48,9,23,80,9,23,112,9,23,10,17,23,42,17,23,73,17,23,105,17,23,4,33,23,36,33,23,67,33,23,99,33,23,16,65,23, +48,65,23,80,65,23,112,65,23,80,66,23,112,66,23,144,66,23,176,66,23,208,66,23,240,66,23,15,67,23,45,67,23,76,67,23,106,67,23,80,68,23,112,68,23,144,68,23, +176,68,23,208,68,23,240,68,23,15,69,23,45,69,23,76,69,23,106,69,23,80,70,23,112,70,23,144,70,23,176,70,23,208,70,23,240,70,23,15,71,23,45,71,23,76,71,23, +106,71,23,80,72,23,112,72,23,144,72,23,176,72,23,208,72,23,240,72,23,15,73,23,45,73,23,76,73,23,106,73,23,6,81,23,37,81,23,69,81,23,100,81,23,144,0,24, +176,0,24,208,0,24,240,0,24,16,1,24,48,1,24,80,1,24,112,1,24,144,16,24,176,16,24,208,16,24,240,16,24,16,17,24,48,17,24,80,17,24,112,17,24,144,32,24, +176,32,24,208,32,24,240,32,24,16,33,24,48,33,24,80,33,24,112,33,24,144,64,24,176,64,24,208,64,24,240,64,24,16,65,24,48,65,24,80,65,24,112,65,24,48,72,24, +80,72,24,112,72,24,144,72,24,176,72,24,208,72,24,240,72,24,16,73,24,47,73,24,75,73,24,102,73,24,144,80,24,176,80,24,208,80,24,240,80,24,15,81,24,46,81,24, +77,81,24,108,81,24,138,96,24,170,96,24,201,96,24,232,96,24,7,97,24,39,97,24,70,97,24,101,97,24,144,128,24,176,128,24,208,128,24,240,128,24,16,129,24,48,129,24, +80,129,24,112,129,24,48,130,24,80,130,24,112,130,24,144,130,24,176,130,24,208,130,24,240,130,24,16,131,24,47,131,24,75,131,24,102,131,24,48,132,24,80,132,24,112,132,24, +144,132,24,176,132,24,208,132,24,240,132,24,16,133,24,47,133,24,75,133,24,102,133,24,48,134,24,80,134,24,112,134,24,144,134,24,176,134,24,208,134,24,240,134,24,16,135,24, +47,135,24,75,135,24,102,135,24,144,144,24,176,144,24,208,144,24,240,144,24,15,145,24,46,145,24,77,145,24,108,145,24,138,160,24,170,160,24,201,160,24,232,160,24,7,161,24, +39,161,24,70,161,24,101,161,24,144,192,24,176,192,24,208,192,24,240,192,24,16,193,24,48,193,24,80,193,24,112,193,24,48,194,24,80,194,24,112,194,24,144,194,24,176,194,24, +208,194,24,238,194,24,10,195,24,39,195,24,69,195,24,97,195,24,48,196,24,80,196,24,112,196,24,144,196,24,176,196,24,208,196,24,238,196,24,10,197,24,39,197,24,69,197,24, +97,197,24,48,198,24,80,198,24,112,198,24,144,198,24,176,198,24,208,198,24,238,198,24,10,199,24,39,199,24,69,199,24,97,199,24,138,208,24,170,208,24,201,208,24,232,208,24, +7,209,24,39,209,24,70,209,24,101,209,24,132,224,24,164,224,24,195,224,24,227,224,24,2,225,24,34,225,24,65,225,24,97,225,24,144,0,25,176,0,25,208,0,25,240,0,25, +16,1,25,48,1,25,80,1,25,112,1,25,48,2,25,80,2,25,112,2,25,144,2,25,176,2,25,208,2,25,238,2,25,10,3,25,39,3,25,69,3,25,97,3,25,48,4,25, +80,4,25,112,4,25,144,4,25,176,4,25,208,4,25,238,4,25,10,5,25,39,5,25,69,5,25,97,5,25,48,6,25,80,6,25,112,6,25,144,6,25,176,6,25,208,6,25, +238,6,25,10,7,25,39,7,25,69,7,25,97,7,25,48,8,25,80,8,25,112,8,25,144,8,25,176,8,25,208,8,25,238,8,25,10,9,25,39,9,25,69,9,25,97,9,25, +138,16,25,170,16,25,201,16,25,232,16,25,7,17,25,39,17,25,70,17,25,101,17,25,132,32,25,164,32,25,195,32,25,227,32,25,2,33,25,34,33,25,65,33,25,97,33,25, +144,64,25,176,64,25,208,64,25,240,64,25,16,65,25,48,65,25,80,65,25,112,65,25,48,66,25,80,66,25,112,66,25,143,66,25,172,66,25,202,66,25,232,66,25,6,67,25, +35,67,25,65,67,25,48,68,25,80,68,25,112,68,25,143,68,25,172,68,25,202,68,25,232,68,25,6,69,25,35,69,25,65,69,25,48,70,25,80,70,25,112,70,25,143,70,25, +172,70,25,202,70,25,232,70,25,6,71,25,35,71,25,65,71,25,48,72,25,80,72,25,112,72,25,143,72,25,172,72,25,202,72,25,232,72,25,6,73,25,35,73,25,65,73,25, +134,80,25,165,80,25,196,80,25,228,80,25,3,81,25,35,81,25,66,81,25,98,81,25,80,0,26,112,0,26,144,0,26,176,0,26,208,0,26,240,0,26,16,1,26,48,1,26, +80,1,26,112,1,26,80,16,26,112,16,26,144,16,26,176,16,26,208,16,26,240,16,26,16,17,26,48,17,26,80,17,26,112,17,26,80,32,26,112,32,26,144,32,26,176,32,26, +208,32,26,240,32,26,16,33,26,47,33,26,77,33,26,107,33,26,80,64,26,112,64,26,144,64,26,176,64,26,208,64,26,240,64,26,16,65,26,48,65,26,80,65,26,112,65,26, +16,72,26,48,72,26,80,72,26,112,72,26,144,72,26,176,72,26,205,72,26,232,72,26,1,73,26,80,80,26,112,80,26,144,80,26,175,80,26,206,80,26,236,80,26,11,81,26, +41,81,26,72,81,26,102,81,26,74,96,26,105,96,26,136,96,26,167,96,26,198,96,26,229,96,26,4,97,26,35,97,26,66,97,26,97,97,26,80,128,26,112,128,26,144,128,26, +176,128,26,208,128,26,240,128,26,16,129,26,48,129,26,80,129,26,112,129,26,16,130,26,48,130,26,80,130,26,112,130,26,144,130,26,176,130,26,205,130,26,232,130,26,1,131,26, +16,132,26,48,132,26,80,132,26,112,132,26,144,132,26,176,132,26,205,132,26,232,132,26,1,133,26,16,134,26,48,134,26,80,134,26,112,134,26,144,134,26,176,134,26,205,134,26, +232,134,26,1,135,26,80,144,26,112,144,26,144,144,26,175,144,26,206,144,26,236,144,26,11,145,26,41,145,26,72,145,26,102,145,26,74,160,26,105,160,26,136,160,26,167,160,26, +198,160,26,229,160,26,4,161,26,35,161,26,66,161,26,97,161,26,80,192,26,112,192,26,144,192,26,176,192,26,208,192,26,240,192,26,16,193,26,48,193,26,80,193,26,112,193,26, +16,194,26,48,194,26,80,194,26,112,194,26,143,194,26,170,194,26,198,194,26,227,194,26,16,196,26,48,196,26,80,196,26,112,196,26,143,196,26,170,196,26,198,196,26,227,196,26, +16,198,26,48,198,26,80,198,26,112,198,26,143,198,26,170,198,26,198,198,26,227,198,26,74,208,26,105,208,26,136,208,26,167,208,26,198,208,26,229,208,26,4,209,26,35,209,26, +66,209,26,97,209,26,68,224,26,99,224,26,131,224,26,162,224,26,193,224,26,225,224,26,0,225,26,32,225,26,80,0,27,112,0,27,144,0,27,176,0,27,208,0,27,240,0,27, +16,1,27,48,1,27,80,1,27,112,1,27,16,2,27,48,2,27,80,2,27,112,2,27,143,2,27,170,2,27,198,2,27,227,2,27,16,4,27,48,4,27,80,4,27,112,4,27, +143,4,27,170,4,27,198,4,27,227,4,27,16,6,27,48,6,27,80,6,27,112,6,27,143,6,27,170,6,27,198,6,27,227,6,27,16,8,27,48,8,27,80,8,27,112,8,27, +143,8,27,170,8,27,198,8,27,227,8,27,74,16,27,105,16,27,136,16,27,167,16,27,198,16,27,229,16,27,4,17,27,35,17,27,66,17,27,97,17,27,68,32,27,99,32,27, +131,32,27,162,32,27,193,32,27,225,32,27,0,33,27,32,33,27,80,64,27,112,64,27,144,64,27,176,64,27,208,64,27,240,64,27,15,65,27,46,65,27,76,65,27,107,65,27, +16,66,27,48,66,27,79,66,27,108,66,27,137,66,27,166,66,27,195,66,27,224,66,27,16,68,27,48,68,27,79,68,27,108,68,27,137,68,27,166,68,27,195,68,27,224,68,27, +16,70,27,48,70,27,79,70,27,108,70,27,137,70,27,166,70,27,195,70,27,224,70,27,16,72,27,48,72,27,79,72,27,108,72,27,137,72,27,166,72,27,195,72,27,224,72,27, +70,80,27,101,80,27,132,80,27,163,80,27,195,80,27,226,80,27,1,81,27,32,81,27,64,81,27,48,0,28,80,0,28,112,0,28,144,0,28,176,0,28,208,0,28,240,0,28, +16,1,28,48,1,28,80,1,28,112,1,28,48,16,28,80,16,28,112,16,28,144,16,28,176,16,28,208,16,28,240,16,28,16,17,28,48,17,28,78,17,28,106,17,28,48,32,28, +80,32,28,112,32,28,144,32,28,176,32,28,208,32,28,238,32,28,11,33,28,41,33,28,71,33,28,100,33,28,48,64,28,80,64,28,112,64,28,144,64,28,176,64,28,208,64,28, +240,64,28,16,65,28,48,65,28,80,65,28,112,65,28,16,72,28,48,72,28,80,72,28,112,72,28,143,72,28,166,72,28,48,80,28,80,80,28,112,80,28,142,80,28,172,80,28, +202,80,28,233,80,28,6,81,28,36,81,28,67,81,28,97,81,28,42,96,28,73,96,28,104,96,28,135,96,28,165,96,28,196,96,28,227,96,28,1,97,28,32,97,28,48,128,28, +80,128,28,112,128,28,144,128,28,176,128,28,208,128,28,240,128,28,16,129,28,48,129,28,80,129,28,112,129,28,16,130,28,48,130,28,80,130,28,112,130,28,143,130,28,166,130,28, +16,132,28,48,132,28,80,132,28,112,132,28,143,132,28,166,132,28,16,134,28,48,134,28,80,134,28,112,134,28,143,134,28,166,134,28,48,144,28,80,144,28,112,144,28,142,144,28, +172,144,28,202,144,28,233,144,28,6,145,28,36,145,28,67,145,28,97,145,28,42,160,28,73,160,28,104,160,28,135,160,28,165,160,28,196,160,28,227,160,28,1,161,28,32,161,28, +48,192,28,80,192,28,112,192,28,144,192,28,176,192,28,208,192,28,240,192,28,16,193,28,47,193,28,77,193,28,106,193,28,16,194,28,48,194,28,80,194,28,107,194,28,135,194,28, +161,194,28,16,196,28,48,196,28,80,196,28,107,196,28,135,196,28,161,196,28,16,198,28,48,198,28,80,198,28,107,198,28,135,198,28,161,198,28,42,208,28,73,208,28,104,208,28, +135,208,28,165,208,28,196,208,28,227,208,28,1,209,28,32,209,28,36,224,28,67,224,28,98,224,28,130,224,28,161,224,28,192,224,28,48,0,29,80,0,29,112,0,29,144,0,29, +176,0,29,208,0,29,240,0,29,16,1,29,47,1,29,77,1,29,106,1,29,16,2,29,48,2,29,80,2,29,107,2,29,135,2,29,161,2,29,16,4,29,48,4,29,80,4,29, +107,4,29,135,4,29,161,4,29,16,6,29,48,6,29,80,6,29,107,6,29,135,6,29,161,6,29,16,8,29,48,8,29,80,8,29,107,8,29,135,8,29,161,8,29,42,16,29, +73,16,29,104,16,29,135,16,29,165,16,29,196,16,29,227,16,29,1,17,29,32,17,29,36,32,29,67,32,29,98,32,29,130,32,29,161,32,29,192,32,29,48,64,29,80,64,29, +112,64,29,144,64,29,176,64,29,207,64,29,237,64,29,11,65,29,41,65,29,71,65,29,101,65,29,16,66,29,47,66,29,74,66,29,102,66,29,131,66,29,16,68,29,47,68,29, +74,68,29,102,68,29,131,68,29,16,70,29,47,70,29,74,70,29,102,70,29,131,70,29,16,72,29,47,72,29,74,72,29,102,72,29,131,72,29,38,80,29,68,80,29,100,80,29, +131,80,29,162,80,29,193,80,29,224,80,29,48,0,30,80,0,30,112,0,30,144,0,30,176,0,30,208,0,30,240,0,30,16,1,30,48,1,30,80,1,30,112,1,30,48,16,30, +80,16,30,112,16,30,144,16,30,176,16,30,208,16,30,240,16,30,12,17,30,39,17,30,68,17,30,48,32,30,80,32,30,112,32,30,144,32,30,174,32,30,203,32,30,233,32,30, +5,33,30,34,33,30,64,33,30,48,64,30,80,64,30,112,64,30,144,64,30,176,64,30,208,64,30,240,64,30,16,65,30,48,65,30,77,65,30,103,65,30,16,72,30,48,72,30, +80,72,30,106,72,30,131,72,30,48,80,30,79,80,30,109,80,30,139,80,30,169,80,30,198,80,30,228,80,30,2,81,30,32,81,30,41,96,30,71,96,30,102,96,30,133,96,30, +163,96,30,193,96,30,224,96,30,48,128,30,80,128,30,112,128,30,144,128,30,176,128,30,208,128,30,240,128,30,16,129,30,48,129,30,77,129,30,103,129,30,16,130,30,48,130,30, +80,130,30,106,130,30,131,130,30,16,132,30,48,132,30,80,132,30,106,132,30,131,132,30,16,134,30,48,134,30,80,134,30,106,134,30,131,134,30,48,144,30,79,144,30,109,144,30, +139,144,30,169,144,30,198,144,30,228,144,30,2,145,30,32,145,30,41,160,30,71,160,30,102,160,30,133,160,30,163,160,30,193,160,30,224,160,30,48,192,30,80,192,30,112,192,30, +144,192,30,176,192,30,208,192,30,239,192,30,11,193,30,40,193,30,70,193,30,98,193,30,16,194,30,48,194,30,74,194,30,100,194,30,16,196,30,48,196,30,74,196,30,100,196,30, +16,198,30,48,198,30,74,198,30,100,198,30,41,208,30,71,208,30,102,208,30,133,208,30,163,208,30,193,208,30,224,208,30,35,224,30,66,224,30,97,224,30,128,224,30,48,0,31, +80,0,31,112,0,31,144,0,31,176,0,31,208,0,31,239,0,31,11,1,31,40,1,31,70,1,31,98,1,31,16,2,31,48,2,31,74,2,31,100,2,31,16,4,31,48,4,31, +74,4,31,100,4,31,16,6,31,48,6,31,74,6,31,100,6,31,16,8,31,48,8,31,74,8,31,100,8,31,41,16,31,71,16,31,102,16,31,133,16,31,163,16,31,193,16,31, +224,16,31,35,32,31,66,32,31,97,32,31,128,32,31,48,64,31,80,64,31,112,64,31,144,64,31,173,64,31,203,64,31,233,64,31,6,65,31,36,65,31,66,65,31,96,65,31, +16,66,31,43,66,31,70,66,31,97,66,31,16,68,31,43,68,31,70,68,31,97,68,31,16,70,31,43,70,31,70,70,31,97,70,31,16,72,31,43,72,31,70,72,31,97,72,31, +37,80,31,67,80,31,98,80,31,129,80,31,160,80,31,48,0,32,80,0,32,112,0,32,144,0,32,176,0,32,208,0,32,240,0,32,16,1,32,48,1,32,48,16,32,80,16,32, +112,16,32,144,16,32,176,16,32,205,16,32,233,16,32,3,17,32,48,32,32,80,32,32,112,32,32,142,32,32,170,32,32,198,32,32,227,32,32,48,64,32,80,64,32,112,64,32, +144,64,32,176,64,32,208,64,32,240,64,32,12,65,32,39,65,32,16,72,32,48,72,32,74,72,32,48,80,32,77,80,32,106,80,32,136,80,32,165,80,32,195,80,32,224,80,32, +40,96,32,70,96,32,100,96,32,131,96,32,161,96,32,48,128,32,80,128,32,112,128,32,144,128,32,176,128,32,208,128,32,240,128,32,12,129,32,39,129,32,16,130,32,48,130,32, +74,130,32,16,132,32,48,132,32,74,132,32,16,134,32,48,134,32,74,134,32,48,144,32,77,144,32,106,144,32,136,144,32,165,144,32,195,144,32,224,144,32,40,160,32,70,160,32, +100,160,32,131,160,32,161,160,32,48,192,32,80,192,32,112,192,32,144,192,32,176,192,32,204,192,32,233,192,32,5,193,32,34,193,32,16,194,32,44,194,32,68,194,32,16,196,32, +44,196,32,68,196,32,16,198,32,44,198,32,68,198,32,40,208,32,70,208,32,100,208,32,131,208,32,161,208,32,35,224,32,65,224,32,96,224,32,48,0,33,80,0,33,112,0,33, +144,0,33,176,0,33,204,0,33,233,0,33,5,1,33,34,1,33,16,2,33,44,2,33,68,2,33,16,4,33,44,4,33,68,4,33,16,6,33,44,6,33,68,6,33,16,8,33, +44,8,33,68,8,33,40,16,33,70,16,33,100,16,33,131,16,33,161,16,33,35,32,33,65,32,33,96,32,33,48,64,33,80,64,33,111,64,33,141,64,33,170,64,33,199,64,33, +229,64,33,2,65,33,16,66,33,39,66,33,65,66,33,16,68,33,39,68,33,65,68,33,16,70,33,39,70,33,65,70,33,16,72,33,39,72,33,65,72,33,36,80,33,66,80,33, +97,80,33,128,80,33,16,0,34,48,0,34,80,0,34,112,0,34,144,0,34,176,0,34,208,0,34,240,0,34,14,1,34,16,16,34,48,16,34,80,16,34,112,16,34,144,16,34, +172,16,34,198,16,34,225,16,34,16,32,34,48,32,34,80,32,34,109,32,34,138,32,34,165,32,34,193,32,34,16,64,34,48,64,34,80,64,34,112,64,34,144,64,34,176,64,34, +207,64,34,234,64,34,3,65,34,16,72,34,48,72,34,65,72,34,16,80,34,46,80,34,75,80,34,104,80,34,133,80,34,162,80,34,10,96,34,39,96,34,68,96,34,98,96,34, +129,96,34,16,128,34,48,128,34,80,128,34,112,128,34,144,128,34,176,128,34,207,128,34,234,128,34,3,129,34,16,130,34,48,130,34,65,130,34,16,132,34,48,132,34,65,132,34, +16,134,34,48,134,34,65,134,34,16,144,34,46,144,34,75,144,34,104,144,34,133,144,34,162,144,34,10,160,34,39,160,34,68,160,34,98,160,34,129,160,34,16,192,34,48,192,34, +80,192,34,112,192,34,144,192,34,171,192,34,199,192,34,228,192,34,16,194,34,40,194,34,16,196,34,40,196,34,16,198,34,40,198,34,10,208,34,39,208,34,68,208,34,98,208,34, +129,208,34,4,224,34,34,224,34,64,224,34,16,0,35,48,0,35,80,0,35,112,0,35,144,0,35,171,0,35,199,0,35,228,0,35,16,2,35,40,2,35,16,4,35,40,4,35, +16,6,35,40,6,35,16,8,35,40,8,35,10,16,35,39,16,35,68,16,35,98,16,35,129,16,35,4,32,35,34,32,35,64,32,35,16,64,35,48,64,35,79,64,35,108,64,35, +138,64,35,166,64,35,195,64,35,225,64,35,15,66,35,36,66,35,15,68,35,36,68,35,15,70,35,36,70,35,15,72,35,36,72,35,6,80,35,35,80,35,65,80,35,96,80,35, +16,0,36,48,0,36,80,0,36,112,0,36,144,0,36,176,0,36,208,0,36,16,16,36,48,16,36,80,16,36,112,16,36,141,16,36,165,16,36,16,32,36,48,32,36,78,32,36, +106,32,36,134,32,36,161,32,36,16,64,36,48,64,36,80,64,36,112,64,36,144,64,36,174,64,36,199,64,36,16,72,36,40,72,36,16,80,36,44,80,36,73,80,36,101,80,36, +130,80,36,10,96,36,37,96,36,67,96,36,97,96,36,16,128,36,48,128,36,80,128,36,112,128,36,144,128,36,174,128,36,199,128,36,16,130,36,40,130,36,16,132,36,40,132,36, +16,134,36,40,134,36,16,144,36,44,144,36,73,144,36,101,144,36,130,144,36,10,160,36,37,160,36,67,160,36,97,160,36,16,192,36,48,192,36,80,192,36,112,192,36,140,192,36, +167,192,36,194,192,36,16,194,36,35,194,36,16,196,36,35,196,36,16,198,36,35,198,36,10,208,36,37,208,36,67,208,36,97,208,36,4,224,36,33,224,36,16,0,37,48,0,37, +80,0,37,112,0,37,140,0,37,167,0,37,194,0,37,16,2,37,35,2,37,16,4,37,35,4,37,16,6,37,35,6,37,16,8,37,35,8,37,10,16,37,37,16,37,67,16,37, +97,16,37,4,32,37,33,32,37,16,64,37,48,64,37,77,64,37,106,64,37,135,64,37,163,64,37,192,64,37,12,66,37,32,66,37,12,68,37,32,68,37,12,70,37,32,70,37, +12,72,37,32,72,37,5,80,37,34,80,37,64,80,37,16,0,38,48,0,38,80,0,38,112,0,38,144,0,38,176,0,38,16,16,38,48,16,38,80,16,38,109,16,38,135,16,38, +16,32,38,48,32,38,75,32,38,102,32,38,130,32,38,16,64,38,48,64,38,80,64,38,112,64,38,144,64,38,167,64,38,16,72,38,33,72,38,16,80,38,43,80,38,70,80,38, +99,80,38,128,80,38,9,96,38,36,96,38,65,96,38,16,128,38,48,128,38,80,128,38,112,128,38,144,128,38,167,128,38,16,130,38,33,130,38,16,132,38,33,132,38,16,134,38, +33,134,38,16,144,38,43,144,38,70,144,38,99,144,38,128,144,38,9,160,38,36,160,38,65,160,38,16,192,38,48,192,38,80,192,38,108,192,38,136,192,38,162,192,38,16,194,38, +16,196,38,16,198,38,9,208,38,36,208,38,65,208,38,3,224,38,32,224,38,16,0,39,48,0,39,80,0,39,108,0,39,136,0,39,162,0,39,16,2,39,16,4,39,16,6,39, +16,8,39,9,16,39,36,16,39,65,16,39,3,32,39,32,32,39,16,64,39,47,64,39,75,64,39,103,64,39,132,64,39,160,64,39,10,66,39,10,68,39,10,70,39,10,72,39, +4,80,39,33,80,39,16,0,40,48,0,40,80,0,40,112,0,40,144,0,40,16,16,40,48,16,40,80,16,40,104,16,40,129,16,40,16,32,40,47,32,40,72,32,40,99,32,40, +16,64,40,48,64,40,80,64,40,112,64,40,138,64,40,16,80,40,41,80,40,68,80,40,96,80,40,8,96,40,35,96,40,64,96,40,16,128,40,48,128,40,80,128,40,112,128,40, +138,128,40,16,144,40,41,144,40,68,144,40,96,144,40,8,160,40,35,160,40,64,160,40,16,192,40,48,192,40,78,192,40,105,192,40,132,192,40,8,208,40,35,208,40,64,208,40, +3,224,40,16,0,41,48,0,41,78,0,41,105,0,41,132,0,41,8,16,41,35,16,41,64,16,41,3,32,41,16,64,41,45,64,41,73,64,41,100,64,41,129,64,41,4,80,41, +32,80,41,16,0,42,48,0,42,80,0,42,112,0,42,144,0,42,16,16,42,48,16,42,76,16,42,99,16,42,16,32,42,44,32,42,69,32,42,16,64,42,48,64,42,80,64,42, +108,64,42,132,64,42,15,80,42,39,80,42,66,80,42,7,96,42,34,96,42,16,128,42,48,128,42,80,128,42,108,128,42,132,128,42,15,144,42,39,144,42,66,144,42,7,160,42, +34,160,42,16,192,42,48,192,42,75,192,42,101,192,42,128,192,42,7,208,42,34,208,42,2,224,42,16,0,43,48,0,43,75,0,43,101,0,43,128,0,43,7,16,43,34,16,43, +2,32,43,16,64,43,44,64,43,70,64,43,98,64,43,3,80,43,176,0,44,208,0,44,240,0,44,16,1,44,48,1,44,80,1,44,112,1,44,176,16,44,208,16,44,240,16,44, +16,17,44,48,17,44,80,17,44,112,17,44,176,32,44,208,32,44,240,32,44,16,33,44,48,33,44,80,33,44,112,33,44,176,64,44,208,64,44,240,64,44,16,65,44,48,65,44, +80,65,44,112,65,44,48,72,44,80,72,44,112,72,44,144,72,44,176,72,44,208,72,44,240,72,44,16,73,44,48,73,44,80,73,44,109,73,44,176,80,44,208,80,44,240,80,44, +16,81,44,48,81,44,79,81,44,110,81,44,170,96,44,202,96,44,233,96,44,8,97,44,40,97,44,71,97,44,102,97,44,176,128,44,208,128,44,240,128,44,16,129,44,48,129,44, +80,129,44,112,129,44,48,130,44,80,130,44,112,130,44,144,130,44,176,130,44,208,130,44,240,130,44,16,131,44,48,131,44,80,131,44,109,131,44,48,132,44,80,132,44,112,132,44, +144,132,44,176,132,44,208,132,44,240,132,44,16,133,44,48,133,44,80,133,44,109,133,44,48,134,44,80,134,44,112,134,44,144,134,44,176,134,44,208,134,44,240,134,44,16,135,44, +48,135,44,80,135,44,109,135,44,176,144,44,208,144,44,240,144,44,16,145,44,48,145,44,79,145,44,110,145,44,170,160,44,202,160,44,233,160,44,8,161,44,40,161,44,71,161,44, +102,161,44,176,192,44,208,192,44,240,192,44,16,193,44,48,193,44,80,193,44,112,193,44,48,194,44,80,194,44,112,194,44,144,194,44,176,194,44,208,194,44,240,194,44,14,195,44, +43,195,44,73,195,44,102,195,44,48,196,44,80,196,44,112,196,44,144,196,44,176,196,44,208,196,44,240,196,44,14,197,44,43,197,44,73,197,44,102,197,44,48,198,44,80,198,44, +112,198,44,144,198,44,176,198,44,208,198,44,240,198,44,14,199,44,43,199,44,73,199,44,102,199,44,170,208,44,202,208,44,233,208,44,8,209,44,40,209,44,71,209,44,102,209,44, +164,224,44,196,224,44,227,224,44,3,225,44,34,225,44,66,225,44,97,225,44,176,0,45,208,0,45,240,0,45,16,1,45,48,1,45,80,1,45,112,1,45,48,2,45,80,2,45, +112,2,45,144,2,45,176,2,45,208,2,45,240,2,45,14,3,45,43,3,45,73,3,45,102,3,45,48,4,45,80,4,45,112,4,45,144,4,45,176,4,45,208,4,45,240,4,45, +14,5,45,43,5,45,73,5,45,102,5,45,48,6,45,80,6,45,112,6,45,144,6,45,176,6,45,208,6,45,240,6,45,14,7,45,43,7,45,73,7,45,102,7,45,48,8,45, +80,8,45,112,8,45,144,8,45,176,8,45,208,8,45,240,8,45,14,9,45,43,9,45,73,9,45,102,9,45,170,16,45,202,16,45,233,16,45,8,17,45,40,17,45,71,17,45, +102,17,45,164,32,45,196,32,45,227,32,45,3,33,45,34,33,45,66,33,45,97,33,45,176,64,45,208,64,45,240,64,45,16,65,45,48,65,45,80,65,45,112,65,45,48,66,45, +80,66,45,112,66,45,144,66,45,175,66,45,204,66,45,235,66,45,9,67,45,38,67,45,69,67,45,99,67,45,48,68,45,80,68,45,112,68,45,144,68,45,175,68,45,204,68,45, +235,68,45,9,69,45,38,69,45,69,69,45,99,69,45,48,70,45,80,70,45,112,70,45,144,70,45,175,70,45,204,70,45,235,70,45,9,71,45,38,71,45,69,71,45,99,71,45, +48,72,45,80,72,45,112,72,45,144,72,45,175,72,45,204,72,45,235,72,45,9,73,45,38,73,45,69,73,45,99,73,45,166,80,45,197,80,45,229,80,45,4,81,45,36,81,45, +67,81,45,99,81,45,80,0,46,112,0,46,144,0,46,176,0,46,208,0,46,240,0,46,16,1,46,48,1,46,80,1,46,112,1,46,80,16,46,112,16,46,144,16,46,176,16,46, +208,16,46,240,16,46,16,17,46,48,17,46,80,17,46,112,17,46,80,32,46,112,32,46,144,32,46,176,32,46,208,32,46,240,32,46,16,33,46,47,33,46,77,33,46,107,33,46, +80,64,46,112,64,46,144,64,46,176,64,46,208,64,46,240,64,46,16,65,46,48,65,46,80,65,46,112,65,46,16,72,46,48,72,46,80,72,46,112,72,46,144,72,46,176,72,46, +205,72,46,232,72,46,1,73,46,80,80,46,112,80,46,144,80,46,175,80,46,206,80,46,236,80,46,11,81,46,41,81,46,72,81,46,102,81,46,74,96,46,105,96,46,136,96,46, +167,96,46,198,96,46,229,96,46,4,97,46,35,97,46,66,97,46,97,97,46,80,128,46,112,128,46,144,128,46,176,128,46,208,128,46,240,128,46,16,129,46,48,129,46,80,129,46, +112,129,46,16,130,46,48,130,46,80,130,46,112,130,46,144,130,46,176,130,46,205,130,46,232,130,46,1,131,46,16,132,46,48,132,46,80,132,46,112,132,46,144,132,46,176,132,46, +205,132,46,232,132,46,1,133,46,16,134,46,48,134,46,80,134,46,112,134,46,144,134,46,176,134,46,205,134,46,232,134,46,1,135,46,80,144,46,112,144,46,144,144,46,175,144,46, +206,144,46,236,144,46,11,145,46,41,145,46,72,145,46,102,145,46,74,160,46,105,160,46,136,160,46,167,160,46,198,160,46,229,160,46,4,161,46,35,161,46,66,161,46,97,161,46, +80,192,46,112,192,46,144,192,46,176,192,46,208,192,46,240,192,46,16,193,46,48,193,46,80,193,46,112,193,46,16,194,46,48,194,46,80,194,46,112,194,46,143,194,46,170,194,46, +198,194,46,227,194,46,16,196,46,48,196,46,80,196,46,112,196,46,143,196,46,170,196,46,198,196,46,227,196,46,16,198,46,48,198,46,80,198,46,112,198,46,143,198,46,170,198,46, +198,198,46,227,198,46,74,208,46,105,208,46,136,208,46,167,208,46,198,208,46,229,208,46,4,209,46,35,209,46,66,209,46,97,209,46,68,224,46,99,224,46,131,224,46,162,224,46, +193,224,46,225,224,46,0,225,46,32,225,46,80,0,47,112,0,47,144,0,47,176,0,47,208,0,47,240,0,47,16,1,47,48,1,47,80,1,47,112,1,47,16,2,47,48,2,47, +80,2,47,112,2,47,143,2,47,170,2,47,198,2,47,227,2,47,16,4,47,48,4,47,80,4,47,112,4,47,143,4,47,170,4,47,198,4,47,227,4,47,16,6,47,48,6,47, +80,6,47,112,6,47,143,6,47,170,6,47,198,6,47,227,6,47,16,8,47,48,8,47,80,8,47,112,8,47,143,8,47,170,8,47,198,8,47,227,8,47,74,16,47,105,16,47, +136,16,47,167,16,47,198,16,47,229,16,47,4,17,47,35,17,47,66,17,47,97,17,47,68,32,47,99,32,47,131,32,47,162,32,47,193,32,47,225,32,47,0,33,47,32,33,47, +80,64,47,112,64,47,144,64,47,176,64,47,208,64,47,240,64,47,15,65,47,46,65,47,76,65,47,107,65,47,16,66,47,48,66,47,79,66,47,108,66,47,137,66,47,166,66,47, +195,66,47,224,66,47,16,68,47,48,68,47,79,68,47,108,68,47,137,68,47,166,68,47,195,68,47,224,68,47,16,70,47,48,70,47,79,70,47,108,70,47,137,70,47,166,70,47, +195,70,47,224,70,47,16,72,47,48,72,47,79,72,47,108,72,47,137,72,47,166,72,47,195,72,47,224,72,47,70,80,47,101,80,47,132,80,47,163,80,47,195,80,47,226,80,47, +1,81,47,32,81,47,64,81,47,48,0,48,80,0,48,112,0,48,144,0,48,176,0,48,208,0,48,240,0,48,16,1,48,48,1,48,80,1,48,112,1,48,48,16,48,80,16,48, +112,16,48,144,16,48,176,16,48,208,16,48,240,16,48,16,17,48,45,17,48,74,17,48,102,17,48,48,32,48,80,32,48,112,32,48,144,32,48,176,32,48,206,32,48,236,32,48, +9,33,48,38,33,48,68,33,48,97,33,48,48,64,48,80,64,48,112,64,48,144,64,48,176,64,48,208,64,48,240,64,48,16,65,48,48,65,48,80,65,48,111,65,48,16,72,48, +48,72,48,80,72,48,112,72,48,138,72,48,161,72,48,48,80,48,80,80,48,111,80,48,141,80,48,171,80,48,201,80,48,231,80,48,5,81,48,35,81,48,65,81,48,42,96,48, +72,96,48,103,96,48,134,96,48,164,96,48,195,96,48,226,96,48,0,97,48,48,128,48,80,128,48,112,128,48,144,128,48,176,128,48,208,128,48,240,128,48,16,129,48,48,129,48, +80,129,48,111,129,48,16,130,48,48,130,48,80,130,48,112,130,48,138,130,48,161,130,48,16,132,48,48,132,48,80,132,48,112,132,48,138,132,48,161,132,48,16,134,48,48,134,48, +80,134,48,112,134,48,138,134,48,161,134,48,48,144,48,80,144,48,111,144,48,141,144,48,171,144,48,201,144,48,231,144,48,5,145,48,35,145,48,65,145,48,42,160,48,72,160,48, +103,160,48,134,160,48,164,160,48,195,160,48,226,160,48,0,161,48,48,192,48,80,192,48,112,192,48,144,192,48,176,192,48,208,192,48,240,192,48,15,193,48,44,193,48,74,193,48, +103,193,48,16,194,48,48,194,48,78,194,48,105,194,48,132,194,48,16,196,48,48,196,48,78,196,48,105,196,48,132,196,48,16,198,48,48,198,48,78,198,48,105,198,48,132,198,48, +42,208,48,72,208,48,103,208,48,134,208,48,164,208,48,195,208,48,226,208,48,0,209,48,36,224,48,67,224,48,98,224,48,129,224,48,160,224,48,48,0,49,80,0,49,112,0,49, +144,0,49,176,0,49,208,0,49,240,0,49,15,1,49,44,1,49,74,1,49,103,1,49,16,2,49,48,2,49,78,2,49,105,2,49,132,2,49,16,4,49,48,4,49,78,4,49, +105,4,49,132,4,49,16,6,49,48,6,49,78,6,49,105,6,49,132,6,49,16,8,49,48,8,49,78,8,49,105,8,49,132,8,49,42,16,49,72,16,49,103,16,49,134,16,49, +164,16,49,195,16,49,226,16,49,0,17,49,36,32,49,67,32,49,98,32,49,129,32,49,160,32,49,48,64,49,80,64,49,112,64,49,144,64,49,175,64,49,205,64,49,236,64,49, +9,65,49,39,65,49,70,65,49,99,65,49,16,66,49,45,66,49,73,66,49,100,66,49,129,66,49,16,68,49,45,68,49,73,68,49,100,68,49,129,68,49,16,70,49,45,70,49, +73,70,49,100,70,49,129,70,49,16,72,49,45,72,49,73,72,49,100,72,49,129,72,49,37,80,49,68,80,49,99,80,49,130,80,49,161,80,49,192,80,49,48,0,50,80,0,50, +112,0,50,144,0,50,176,0,50,208,0,50,240,0,50,16,1,50,48,1,50,80,1,50,48,16,50,80,16,50,112,16,50,144,16,50,176,16,50,208,16,50,236,16,50,6,17,50, +33,17,50,48,32,50,80,32,50,112,32,50,143,32,50,171,32,50,200,32,50,229,32,50,1,33,50,48,64,50,80,64,50,112,64,50,144,64,50,176,64,50,208,64,50,240,64,50, +15,65,50,42,65,50,70,65,50,16,72,50,48,72,50,77,72,50,99,72,50,48,80,50,78,80,50,107,80,50,137,80,50,166,80,50,196,80,50,226,80,50,40,96,50,70,96,50, +101,96,50,131,96,50,161,96,50,192,96,50,48,128,50,80,128,50,112,128,50,144,128,50,176,128,50,208,128,50,240,128,50,15,129,50,42,129,50,70,129,50,16,130,50,48,130,50, +77,130,50,99,130,50,16,132,50,48,132,50,77,132,50,99,132,50,16,134,50,48,134,50,77,134,50,99,134,50,48,144,50,78,144,50,107,144,50,137,144,50,166,144,50,196,144,50, +226,144,50,40,160,50,70,160,50,101,160,50,131,160,50,161,160,50,192,160,50,48,192,50,80,192,50,112,192,50,144,192,50,176,192,50,206,192,50,235,192,50,7,193,50,36,193,50, +65,193,50,16,194,50,46,194,50,70,194,50,16,196,50,46,196,50,70,196,50,16,198,50,46,198,50,70,198,50,40,208,50,70,208,50,101,208,50,131,208,50,161,208,50,192,208,50, +35,224,50,65,224,50,96,224,50,128,224,50,48,0,51,80,0,51,112,0,51,144,0,51,176,0,51,206,0,51,235,0,51,7,1,51,36,1,51,65,1,51,16,2,51,46,2,51, +70,2,51,16,4,51,46,4,51,70,4,51,16,6,51,46,6,51,70,6,51,16,8,51,46,8,51,70,8,51,40,16,51,70,16,51,101,16,51,131,16,51,161,16,51,192,16,51, +35,32,51,65,32,51,96,32,51,128,32,51,48,64,51,80,64,51,112,64,51,142,64,51,171,64,51,200,64,51,230,64,51,3,65,51,33,65,51,16,66,51,41,66,51,67,66,51, +16,68,51,41,68,51,67,68,51,16,70,51,41,70,51,67,70,51,16,72,51,41,72,51,67,72,51,36,80,51,67,80,51,97,80,51,128,80,51,16,0,52,48,0,52,80,0,52, +112,0,52,144,0,52,176,0,52,208,0,52,240,0,52,14,1,52,16,16,52,48,16,52,80,16,52,112,16,52,144,16,52,172,16,52,198,16,52,225,16,52,16,32,52,48,32,52, +80,32,52,109,32,52,138,32,52,165,32,52,193,32,52,16,64,52,48,64,52,80,64,52,112,64,52,144,64,52,176,64,52,207,64,52,234,64,52,3,65,52,16,72,52,48,72,52, +65,72,52,16,80,52,46,80,52,75,80,52,104,80,52,133,80,52,162,80,52,10,96,52,39,96,52,68,96,52,98,96,52,129,96,52,16,128,52,48,128,52,80,128,52,112,128,52, +144,128,52,176,128,52,207,128,52,234,128,52,3,129,52,16,130,52,48,130,52,65,130,52,16,132,52,48,132,52,65,132,52,16,134,52,48,134,52,65,134,52,16,144,52,46,144,52, +75,144,52,104,144,52,133,144,52,162,144,52,10,160,52,39,160,52,68,160,52,98,160,52,129,160,52,16,192,52,48,192,52,80,192,52,112,192,52,144,192,52,171,192,52,199,192,52, +228,192,52,16,194,52,40,194,52,16,196,52,40,196,52,16,198,52,40,198,52,10,208,52,39,208,52,68,208,52,98,208,52,129,208,52,4,224,52,34,224,52,64,224,52,16,0,53, +48,0,53,80,0,53,112,0,53,144,0,53,171,0,53,199,0,53,228,0,53,16,2,53,40,2,53,16,4,53,40,4,53,16,6,53,40,6,53,16,8,53,40,8,53,10,16,53, +39,16,53,68,16,53,98,16,53,129,16,53,4,32,53,34,32,53,64,32,53,16,64,53,48,64,53,79,64,53,108,64,53,138,64,53,166,64,53,195,64,53,225,64,53,15,66,53, +36,66,53,15,68,53,36,68,53,15,70,53,36,70,53,15,72,53,36,72,53,6,80,53,35,80,53,65,80,53,96,80,53,16,0,54,48,0,54,80,0,54,112,0,54,144,0,54, +176,0,54,208,0,54,16,16,54,48,16,54,80,16,54,112,16,54,139,16,54,163,16,54,16,32,54,48,32,54,77,32,54,104,32,54,133,32,54,16,64,54,48,64,54,80,64,54, +112,64,54,144,64,54,172,64,54,196,64,54,16,72,54,38,72,54,16,80,54,44,80,54,72,80,54,100,80,54,129,80,54,9,96,54,37,96,54,66,96,54,96,96,54,16,128,54, +48,128,54,80,128,54,112,128,54,144,128,54,172,128,54,196,128,54,16,130,54,38,130,54,16,132,54,38,132,54,16,134,54,38,134,54,16,144,54,44,144,54,72,144,54,100,144,54, +129,144,54,9,160,54,37,160,54,66,160,54,96,160,54,16,192,54,48,192,54,80,192,54,110,192,54,139,192,54,165,192,54,192,192,54,16,194,54,33,194,54,16,196,54,33,196,54, +16,198,54,33,198,54,9,208,54,37,208,54,66,208,54,96,208,54,3,224,54,33,224,54,16,0,55,48,0,55,80,0,55,110,0,55,139,0,55,165,0,55,192,0,55,16,2,55, +33,2,55,16,4,55,33,4,55,16,6,55,33,6,55,16,8,55,33,8,55,9,16,55,37,16,55,66,16,55,96,16,55,3,32,55,33,32,55,16,64,55,48,64,55,76,64,55, +105,64,55,134,64,55,162,64,55,12,66,55,12,68,55,12,70,55,12,72,55,5,80,55,34,80,55,64,80,55,16,0,56,48,0,56,80,0,56,112,0,56,144,0,56,174,0,56, +16,16,56,48,16,56,80,16,56,106,16,56,131,16,56,16,32,56,47,32,56,73,32,56,100,32,56,16,64,56,48,64,56,80,64,56,112,64,56,140,64,56,163,64,56,16,72,56, +16,80,56,41,80,56,69,80,56,97,80,56,8,96,56,35,96,56,64,96,56,16,128,56,48,128,56,80,128,56,112,128,56,140,128,56,163,128,56,16,130,56,16,132,56,16,134,56, +16,144,56,41,144,56,69,144,56,97,144,56,8,160,56,35,160,56,64,160,56,16,192,56,48,192,56,79,192,56,106,192,56,133,192,56,14,194,56,14,196,56,14,198,56,8,208,56, +35,208,56,64,208,56,3,224,56,32,224,56,16,0,57,48,0,57,79,0,57,106,0,57,133,0,57,14,2,57,14,4,57,14,6,57,14,8,57,8,16,57,35,16,57,64,16,57, +3,32,57,32,32,57,16,64,57,46,64,57,73,64,57,101,64,57,130,64,57,9,66,57,9,68,57,9,70,57,9,72,57,4,80,57,32,80,57,16,0,58,48,0,58,80,0,58, +112,0,58,144,0,58,16,16,58,48,16,58,76,16,58,99,16,58,16,32,58,44,32,58,69,32,58,16,64,58,48,64,58,80,64,58,108,64,58,132,64,58,15,80,58,39,80,58, +66,80,58,7,96,58,34,96,58,16,128,58,48,128,58,80,128,58,108,128,58,132,128,58,15,144,58,39,144,58,66,144,58,7,160,58,34,160,58,16,192,58,48,192,58,75,192,58, +101,192,58,128,192,58,7,208,58,34,208,58,2,224,58,16,0,59,48,0,59,75,0,59,101,0,59,128,0,59,7,16,59,34,16,59,2,32,59,16,64,59,44,64,59,70,64,59, +98,64,59,3,80,59,16,0,60,48,0,60,80,0,60,112,0,60,16,16,60,48,16,60,70,16,60,16,32,60,41,32,60,65,32,60,16,64,60,48,64,60,79,64,60,100,64,60, +14,80,60,37,80,60,6,96,60,32,96,60,16,128,60,48,128,60,79,128,60,100,128,60,14,144,60,37,144,60,6,160,60,32,160,60,16,192,60,47,192,60,71,192,60,96,192,60, +6,208,60,32,208,60,1,224,60,16,0,61,47,0,61,71,0,61,96,0,61,6,16,61,32,16,61,1,32,61,16,64,61,41,64,61,67,64,61,3,80,61,16,0,62,48,0,62, +80,0,62,16,16,62,45,16,62,64,16,62,16,32,62,38,32,62,16,64,62,48,64,62,73,64,62,12,80,62,34,80,62,5,96,62,16,128,62,48,128,62,73,128,62,12,144,62, +34,144,62,5,160,62,16,192,62,44,192,62,67,192,62,5,208,62,1,224,62,16,0,63,44,0,63,67,0,63,5,16,63,1,32,63,16,64,63,39,64,63,64,64,63,2,80,63, +16,0,64,48,0,64,78,0,64,16,16,64,40,16,64,16,32,64,35,32,64,16,64,64,48,64,64,67,64,64,11,80,64,32,80,64,4,96,64,16,128,64,48,128,64,67,128,64, +11,144,64,32,144,64,4,160,64,16,192,64,41,192,64,4,208,64,0,224,64,16,0,65,41,0,65,4,16,65,0,32,65,15,64,65,36,64,65,1,80,65,112,0,66,144,0,66, +176,0,66,208,0,66,240,0,66,16,1,66,48,1,66,80,1,66,112,1,66,112,16,66,144,16,66,176,16,66,208,16,66,240,16,66,16,17,66,48,17,66,80,17,66,112,17,66, +112,32,66,144,32,66,176,32,66,208,32,66,240,32,66,16,33,66,48,33,66,80,33,66,112,33,66,112,64,66,144,64,66,176,64,66,208,64,66,240,64,66,16,65,66,48,65,66, +80,65,66,112,65,66,48,72,66,80,72,66,112,72,66,144,72,66,176,72,66,208,72,66,240,72,66,13,73,66,40,73,66,68,73,66,112,80,66,144,80,66,176,80,66,208,80,66, +239,80,66,14,81,66,44,81,66,76,81,66,106,81,66,106,96,66,138,96,66,169,96,66,200,96,66,231,96,66,6,97,66,37,97,66,69,97,66,100,97,66,112,128,66,144,128,66, +176,128,66,208,128,66,240,128,66,16,129,66,48,129,66,80,129,66,112,129,66,48,130,66,80,130,66,112,130,66,144,130,66,176,130,66,208,130,66,240,130,66,13,131,66,40,131,66, +68,131,66,48,132,66,80,132,66,112,132,66,144,132,66,176,132,66,208,132,66,240,132,66,13,133,66,40,133,66,68,133,66,48,134,66,80,134,66,112,134,66,144,134,66,176,134,66, +208,134,66,240,134,66,13,135,66,40,135,66,68,135,66,112,144,66,144,144,66,176,144,66,208,144,66,239,144,66,14,145,66,44,145,66,76,145,66,106,145,66,106,160,66,138,160,66, +169,160,66,200,160,66,231,160,66,6,161,66,37,161,66,69,161,66,100,161,66,112,192,66,144,192,66,176,192,66,208,192,66,240,192,66,16,193,66,48,193,66,80,193,66,112,193,66, +48,194,66,80,194,66,112,194,66,144,194,66,176,194,66,205,194,66,234,194,66,6,195,66,35,195,66,64,195,66,48,196,66,80,196,66,112,196,66,144,196,66,176,196,66,205,196,66, +234,196,66,6,197,66,35,197,66,64,197,66,48,198,66,80,198,66,112,198,66,144,198,66,176,198,66,205,198,66,234,198,66,6,199,66,35,199,66,64,199,66,106,208,66,138,208,66, +169,208,66,200,208,66,231,208,66,6,209,66,37,209,66,69,209,66,100,209,66,100,224,66,132,224,66,163,224,66,195,224,66,226,224,66,1,225,66,33,225,66,64,225,66,96,225,66, +112,0,67,144,0,67,176,0,67,208,0,67,240,0,67,16,1,67,48,1,67,80,1,67,112,1,67,48,2,67,80,2,67,112,2,67,144,2,67,176,2,67,205,2,67,234,2,67, +6,3,67,35,3,67,64,3,67,48,4,67,80,4,67,112,4,67,144,4,67,176,4,67,205,4,67,234,4,67,6,5,67,35,5,67,64,5,67,48,6,67,80,6,67,112,6,67, +144,6,67,176,6,67,205,6,67,234,6,67,6,7,67,35,7,67,64,7,67,48,8,67,80,8,67,112,8,67,144,8,67,176,8,67,205,8,67,234,8,67,6,9,67,35,9,67, +64,9,67,106,16,67,138,16,67,169,16,67,200,16,67,231,16,67,6,17,67,37,17,67,69,17,67,100,17,67,100,32,67,132,32,67,163,32,67,195,32,67,226,32,67,1,33,67, +33,33,67,64,33,67,96,33,67,112,64,67,144,64,67,176,64,67,208,64,67,240,64,67,16,65,67,48,65,67,80,65,67,111,65,67,48,66,67,80,66,67,111,66,67,141,66,67, +170,66,67,199,66,67,230,66,67,3,67,67,32,67,67,48,68,67,80,68,67,111,68,67,141,68,67,170,68,67,199,68,67,230,68,67,3,69,67,32,69,67,48,70,67,80,70,67, +111,70,67,141,70,67,170,70,67,199,70,67,230,70,67,3,71,67,32,71,67,48,72,67,80,72,67,111,72,67,141,72,67,170,72,67,199,72,67,230,72,67,3,73,67,32,73,67, +102,80,67,133,80,67,164,80,67,196,80,67,227,80,67,3,81,67,34,81,67,65,81,67,97,81,67,48,0,68,80,0,68,112,0,68,144,0,68,176,0,68,208,0,68,240,0,68, +16,1,68,48,1,68,80,1,68,112,1,68,48,16,68,80,16,68,112,16,68,144,16,68,176,16,68,208,16,68,240,16,68,16,17,68,48,17,68,78,17,68,106,17,68,48,32,68, +80,32,68,112,32,68,144,32,68,176,32,68,208,32,68,238,32,68,11,33,68,41,33,68,71,33,68,100,33,68,48,64,68,80,64,68,112,64,68,144,64,68,176,64,68,208,64,68, +240,64,68,16,65,68,48,65,68,80,65,68,112,65,68,16,72,68,48,72,68,80,72,68,112,72,68,143,72,68,166,72,68,48,80,68,80,80,68,112,80,68,142,80,68,172,80,68, +202,80,68,233,80,68,6,81,68,36,81,68,67,81,68,97,81,68,42,96,68,73,96,68,104,96,68,135,96,68,165,96,68,196,96,68,227,96,68,1,97,68,32,97,68,48,128,68, +80,128,68,112,128,68,144,128,68,176,128,68,208,128,68,240,128,68,16,129,68,48,129,68,80,129,68,112,129,68,16,130,68,48,130,68,80,130,68,112,130,68,143,130,68,166,130,68, +16,132,68,48,132,68,80,132,68,112,132,68,143,132,68,166,132,68,16,134,68,48,134,68,80,134,68,112,134,68,143,134,68,166,134,68,48,144,68,80,144,68,112,144,68,142,144,68, +172,144,68,202,144,68,233,144,68,6,145,68,36,145,68,67,145,68,97,145,68,42,160,68,73,160,68,104,160,68,135,160,68,165,160,68,196,160,68,227,160,68,1,161,68,32,161,68, +48,192,68,80,192,68,112,192,68,144,192,68,176,192,68,208,192,68,240,192,68,16,193,68,47,193,68,77,193,68,106,193,68,16,194,68,48,194,68,80,194,68,107,194,68,135,194,68, +161,194,68,16,196,68,48,196,68,80,196,68,107,196,68,135,196,68,161,196,68,16,198,68,48,198,68,80,198,68,107,198,68,135,198,68,161,198,68,42,208,68,73,208,68,104,208,68, +135,208,68,165,208,68,196,208,68,227,208,68,1,209,68,32,209,68,36,224,68,67,224,68,98,224,68,130,224,68,161,224,68,192,224,68,48,0,69,80,0,69,112,0,69,144,0,69, +176,0,69,208,0,69,240,0,69,16,1,69,47,1,69,77,1,69,106,1,69,16,2,69,48,2,69,80,2,69,107,2,69,135,2,69,161,2,69,16,4,69,48,4,69,80,4,69, +107,4,69,135,4,69,161,4,69,16,6,69,48,6,69,80,6,69,107,6,69,135,6,69,161,6,69,16,8,69,48,8,69,80,8,69,107,8,69,135,8,69,161,8,69,42,16,69, +73,16,69,104,16,69,135,16,69,165,16,69,196,16,69,227,16,69,1,17,69,32,17,69,36,32,69,67,32,69,98,32,69,130,32,69,161,32,69,192,32,69,48,64,69,80,64,69, +112,64,69,144,64,69,176,64,69,207,64,69,237,64,69,11,65,69,41,65,69,71,65,69,101,65,69,16,66,69,47,66,69,74,66,69,102,66,69,131,66,69,16,68,69,47,68,69, +74,68,69,102,68,69,131,68,69,16,70,69,47,70,69,74,70,69,102,70,69,131,70,69,16,72,69,47,72,69,74,72,69,102,72,69,131,72,69,38,80,69,68,80,69,100,80,69, +131,80,69,162,80,69,193,80,69,224,80,69,48,0,70,80,0,70,112,0,70,144,0,70,176,0,70,208,0,70,240,0,70,16,1,70,48,1,70,80,1,70,48,16,70,80,16,70, +112,16,70,144,16,70,176,16,70,208,16,70,236,16,70,6,17,70,33,17,70,48,32,70,80,32,70,112,32,70,143,32,70,171,32,70,200,32,70,229,32,70,1,33,70,48,64,70, +80,64,70,112,64,70,144,64,70,176,64,70,208,64,70,240,64,70,15,65,70,42,65,70,70,65,70,16,72,70,48,72,70,77,72,70,99,72,70,48,80,70,78,80,70,107,80,70, +137,80,70,166,80,70,196,80,70,226,80,70,40,96,70,70,96,70,101,96,70,131,96,70,161,96,70,192,96,70,48,128,70,80,128,70,112,128,70,144,128,70,176,128,70,208,128,70, +240,128,70,15,129,70,42,129,70,70,129,70,16,130,70,48,130,70,77,130,70,99,130,70,16,132,70,48,132,70,77,132,70,99,132,70,16,134,70,48,134,70,77,134,70,99,134,70, +48,144,70,78,144,70,107,144,70,137,144,70,166,144,70,196,144,70,226,144,70,40,160,70,70,160,70,101,160,70,131,160,70,161,160,70,192,160,70,48,192,70,80,192,70,112,192,70, +144,192,70,176,192,70,206,192,70,235,192,70,7,193,70,36,193,70,65,193,70,16,194,70,46,194,70,70,194,70,16,196,70,46,196,70,70,196,70,16,198,70,46,198,70,70,198,70, +40,208,70,70,208,70,101,208,70,131,208,70,161,208,70,192,208,70,35,224,70,65,224,70,96,224,70,128,224,70,48,0,71,80,0,71,112,0,71,144,0,71,176,0,71,206,0,71, +235,0,71,7,1,71,36,1,71,65,1,71,16,2,71,46,2,71,70,2,71,16,4,71,46,4,71,70,4,71,16,6,71,46,6,71,70,6,71,16,8,71,46,8,71,70,8,71, +40,16,71,70,16,71,101,16,71,131,16,71,161,16,71,192,16,71,35,32,71,65,32,71,96,32,71,128,32,71,48,64,71,80,64,71,112,64,71,142,64,71,171,64,71,200,64,71, +230,64,71,3,65,71,33,65,71,16,66,71,41,66,71,67,66,71,16,68,71,41,68,71,67,68,71,16,70,71,41,70,71,67,70,71,16,72,71,41,72,71,67,72,71,36,80,71, +67,80,71,97,80,71,128,80,71,16,0,72,48,0,72,80,0,72,112,0,72,144,0,72,176,0,72,208,0,72,240,0,72,16,16,72,48,16,72,80,16,72,112,16,72,144,16,72, +170,16,72,195,16,72,16,32,72,48,32,72,80,32,72,108,32,72,137,32,72,164,32,72,16,64,72,48,64,72,80,64,72,112,64,72,144,64,72,176,64,72,204,64,72,231,64,72, +16,72,72,45,72,72,16,80,72,46,80,72,74,80,72,103,80,72,132,80,72,161,80,72,10,96,72,38,96,72,68,96,72,98,96,72,128,96,72,16,128,72,48,128,72,80,128,72, +112,128,72,144,128,72,176,128,72,204,128,72,231,128,72,16,130,72,45,130,72,16,132,72,45,132,72,16,134,72,45,134,72,16,144,72,46,144,72,74,144,72,103,144,72,132,144,72, +161,144,72,10,160,72,38,160,72,68,160,72,98,160,72,128,160,72,16,192,72,48,192,72,80,192,72,112,192,72,143,192,72,170,192,72,197,192,72,226,192,72,16,194,72,38,194,72, +16,196,72,38,196,72,16,198,72,38,198,72,10,208,72,38,208,72,68,208,72,98,208,72,128,208,72,4,224,72,33,224,72,64,224,72,16,0,73,48,0,73,80,0,73,112,0,73, +143,0,73,170,0,73,197,0,73,226,0,73,16,2,73,38,2,73,16,4,73,38,4,73,16,6,73,38,6,73,16,8,73,38,8,73,10,16,73,38,16,73,68,16,73,98,16,73, +128,16,73,4,32,73,33,32,73,64,32,73,16,64,73,48,64,73,79,64,73,107,64,73,137,64,73,165,64,73,194,64,73,224,64,73,14,66,73,35,66,73,14,68,73,35,68,73, +14,70,73,35,70,73,14,72,73,35,72,73,6,80,73,35,80,73,65,80,73,16,0,74,48,0,74,80,0,74,112,0,74,144,0,74,176,0,74,16,16,74,48,16,74,80,16,74, +109,16,74,135,16,74,16,32,74,48,32,74,75,32,74,102,32,74,130,32,74,16,64,74,48,64,74,80,64,74,112,64,74,144,64,74,167,64,74,16,72,74,33,72,74,16,80,74, +43,80,74,70,80,74,99,80,74,128,80,74,9,96,74,36,96,74,65,96,74,16,128,74,48,128,74,80,128,74,112,128,74,144,128,74,167,128,74,16,130,74,33,130,74,16,132,74, +33,132,74,16,134,74,33,134,74,16,144,74,43,144,74,70,144,74,99,144,74,128,144,74,9,160,74,36,160,74,65,160,74,16,192,74,48,192,74,80,192,74,108,192,74,136,192,74, +162,192,74,16,194,74,16,196,74,16,198,74,9,208,74,36,208,74,65,208,74,3,224,74,32,224,74,16,0,75,48,0,75,80,0,75,108,0,75,136,0,75,162,0,75,16,2,75, +16,4,75,16,6,75,16,8,75,9,16,75,36,16,75,65,16,75,3,32,75,32,32,75,16,64,75,47,64,75,75,64,75,103,64,75,132,64,75,160,64,75,10,66,75,10,68,75, +10,70,75,10,72,75,4,80,75,33,80,75,16,0,76,48,0,76,80,0,76,112,0,76,144,0,76,16,16,76,48,16,76,77,16,76,100,16,76,16,32,76,45,32,76,70,32,76, +96,32,76,16,64,76,48,64,76,80,64,76,109,64,76,135,64,76,16,80,76,40,80,76,67,80,76,8,96,76,34,96,76,16,128,76,48,128,76,80,128,76,109,128,76,135,128,76, +16,144,76,40,144,76,67,144,76,8,160,76,34,160,76,16,192,76,48,192,76,76,192,76,102,192,76,130,192,76,8,208,76,34,208,76,2,224,76,16,0,77,48,0,77,76,0,77, +102,0,77,130,0,77,8,16,77,34,16,77,2,32,77,16,64,77,44,64,77,71,64,77,99,64,77,4,80,77,32,80,77,16,0,78,48,0,78,80,0,78,112,0,78,16,16,78, +48,16,78,70,16,78,16,32,78,41,32,78,65,32,78,16,64,78,48,64,78,79,64,78,100,64,78,14,80,78,37,80,78,6,96,78,32,96,78,16,128,78,48,128,78,79,128,78, +100,128,78,14,144,78,37,144,78,6,160,78,32,160,78,16,192,78,47,192,78,71,192,78,96,192,78,6,208,78,32,208,78,1,224,78,16,0,79,47,0,79,71,0,79,96,0,79, +6,16,79,32,16,79,1,32,79,16,64,79,41,64,79,67,64,79,3,80,79,16,0,80,48,0,80,80,0,80,16,16,80,44,16,80,16,32,80,37,32,80,16,64,80,48,64,80, +71,64,80,12,80,80,34,80,80,5,96,80,16,128,80,48,128,80,71,128,80,12,144,80,34,144,80,5,160,80,16,192,80,43,192,80,66,192,80,5,208,80,1,224,80,16,0,81, +43,0,81,66,0,81,5,16,81,1,32,81,16,64,81,38,64,81,64,64,81,2,80,81,16,0,82,48,0,82,16,16,82,38,16,82,16,32,82,33,32,82,16,64,82,47,64,82, +10,80,82,4,96,82,16,128,82,47,128,82,10,144,82,4,160,82,16,192,82,39,192,82,4,208,82,0,224,82,16,0,83,39,0,83,4,16,83,0,32,83,15,64,83,35,64,83, +1,80,83,16,0,84,48,0,84,16,16,84,32,16,84,14,32,84,16,64,84,41,64,84,8,80,84,3,96,84,16,128,84,41,128,84,8,144,84,3,160,84,16,192,84,35,192,84, +3,208,84,16,0,85,35,0,85,3,16,85,13,64,85,32,64,85,0,80,85,16,0,86,46,0,86,16,16,86,11,32,86,16,64,86,35,64,86,6,80,86,1,96,86,16,128,86, +35,128,86,6,144,86,1,160,86,16,192,86,1,208,86,16,0,87,1,16,87,11,64,87,80,0,88,112,0,88,144,0,88,176,0,88,208,0,88,240,0,88,16,1,88,48,1,88, +80,1,88,112,1,88,80,16,88,112,16,88,144,16,88,176,16,88,208,16,88,240,16,88,16,17,88,48,17,88,80,17,88,112,17,88,80,32,88,112,32,88,144,32,88,176,32,88, +208,32,88,240,32,88,16,33,88,47,33,88,77,33,88,107,33,88,80,64,88,112,64,88,144,64,88,176,64,88,208,64,88,240,64,88,16,65,88,48,65,88,80,65,88,112,65,88, +16,72,88,48,72,88,80,72,88,112,72,88,144,72,88,176,72,88,205,72,88,232,72,88,1,73,88,80,80,88,112,80,88,144,80,88,175,80,88,206,80,88,236,80,88,11,81,88, +41,81,88,72,81,88,102,81,88,74,96,88,105,96,88,136,96,88,167,96,88,198,96,88,229,96,88,4,97,88,35,97,88,66,97,88,97,97,88,80,128,88,112,128,88,144,128,88, +176,128,88,208,128,88,240,128,88,16,129,88,48,129,88,80,129,88,112,129,88,16,130,88,48,130,88,80,130,88,112,130,88,144,130,88,176,130,88,205,130,88,232,130,88,1,131,88, +16,132,88,48,132,88,80,132,88,112,132,88,144,132,88,176,132,88,205,132,88,232,132,88,1,133,88,16,134,88,48,134,88,80,134,88,112,134,88,144,134,88,176,134,88,205,134,88, +232,134,88,1,135,88,80,144,88,112,144,88,144,144,88,175,144,88,206,144,88,236,144,88,11,145,88,41,145,88,72,145,88,102,145,88,74,160,88,105,160,88,136,160,88,167,160,88, +198,160,88,229,160,88,4,161,88,35,161,88,66,161,88,97,161,88,80,192,88,112,192,88,144,192,88,176,192,88,208,192,88,240,192,88,16,193,88,48,193,88,80,193,88,112,193,88, +16,194,88,48,194,88,80,194,88,112,194,88,143,194,88,170,194,88,198,194,88,227,194,88,16,196,88,48,196,88,80,196,88,112,196,88,143,196,88,170,196,88,198,196,88,227,196,88, +16,198,88,48,198,88,80,198,88,112,198,88,143,198,88,170,198,88,198,198,88,227,198,88,74,208,88,105,208,88,136,208,88,167,208,88,198,208,88,229,208,88,4,209,88,35,209,88, +66,209,88,97,209,88,68,224,88,99,224,88,131,224,88,162,224,88,193,224,88,225,224,88,0,225,88,32,225,88,80,0,89,112,0,89,144,0,89,176,0,89,208,0,89,240,0,89, +16,1,89,48,1,89,80,1,89,112,1,89,16,2,89,48,2,89,80,2,89,112,2,89,143,2,89,170,2,89,198,2,89,227,2,89,16,4,89,48,4,89,80,4,89,112,4,89, +143,4,89,170,4,89,198,4,89,227,4,89,16,6,89,48,6,89,80,6,89,112,6,89,143,6,89,170,6,89,198,6,89,227,6,89,16,8,89,48,8,89,80,8,89,112,8,89, +143,8,89,170,8,89,198,8,89,227,8,89,74,16,89,105,16,89,136,16,89,167,16,89,198,16,89,229,16,89,4,17,89,35,17,89,66,17,89,97,17,89,68,32,89,99,32,89, +131,32,89,162,32,89,193,32,89,225,32,89,0,33,89,32,33,89,80,64,89,112,64,89,144,64,89,176,64,89,208,64,89,240,64,89,15,65,89,46,65,89,76,65,89,107,65,89, +16,66,89,48,66,89,79,66,89,108,66,89,137,66,89,166,66,89,195,66,89,224,66,89,16,68,89,48,68,89,79,68,89,108,68,89,137,68,89,166,68,89,195,68,89,224,68,89, +16,70,89,48,70,89,79,70,89,108,70,89,137,70,89,166,70,89,195,70,89,224,70,89,16,72,89,48,72,89,79,72,89,108,72,89,137,72,89,166,72,89,195,72,89,224,72,89, +70,80,89,101,80,89,132,80,89,163,80,89,195,80,89,226,80,89,1,81,89,32,81,89,64,81,89,48,0,90,80,0,90,112,0,90,144,0,90,176,0,90,208,0,90,240,0,90, +16,1,90,48,1,90,80,1,90,112,1,90,48,16,90,80,16,90,112,16,90,144,16,90,176,16,90,208,16,90,240,16,90,12,17,90,39,17,90,68,17,90,48,32,90,80,32,90, +112,32,90,144,32,90,174,32,90,203,32,90,233,32,90,5,33,90,34,33,90,64,33,90,48,64,90,80,64,90,112,64,90,144,64,90,176,64,90,208,64,90,240,64,90,16,65,90, +48,65,90,77,65,90,103,65,90,16,72,90,48,72,90,80,72,90,106,72,90,131,72,90,48,80,90,79,80,90,109,80,90,139,80,90,169,80,90,198,80,90,228,80,90,2,81,90, +32,81,90,41,96,90,71,96,90,102,96,90,133,96,90,163,96,90,193,96,90,224,96,90,48,128,90,80,128,90,112,128,90,144,128,90,176,128,90,208,128,90,240,128,90,16,129,90, +48,129,90,77,129,90,103,129,90,16,130,90,48,130,90,80,130,90,106,130,90,131,130,90,16,132,90,48,132,90,80,132,90,106,132,90,131,132,90,16,134,90,48,134,90,80,134,90, +106,134,90,131,134,90,48,144,90,79,144,90,109,144,90,139,144,90,169,144,90,198,144,90,228,144,90,2,145,90,32,145,90,41,160,90,71,160,90,102,160,90,133,160,90,163,160,90, +193,160,90,224,160,90,48,192,90,80,192,90,112,192,90,144,192,90,176,192,90,208,192,90,239,192,90,11,193,90,40,193,90,70,193,90,98,193,90,16,194,90,48,194,90,74,194,90, +100,194,90,16,196,90,48,196,90,74,196,90,100,196,90,16,198,90,48,198,90,74,198,90,100,198,90,41,208,90,71,208,90,102,208,90,133,208,90,163,208,90,193,208,90,224,208,90, +35,224,90,66,224,90,97,224,90,128,224,90,48,0,91,80,0,91,112,0,91,144,0,91,176,0,91,208,0,91,239,0,91,11,1,91,40,1,91,70,1,91,98,1,91,16,2,91, +48,2,91,74,2,91,100,2,91,16,4,91,48,4,91,74,4,91,100,4,91,16,6,91,48,6,91,74,6,91,100,6,91,16,8,91,48,8,91,74,8,91,100,8,91,41,16,91, +71,16,91,102,16,91,133,16,91,163,16,91,193,16,91,224,16,91,35,32,91,66,32,91,97,32,91,128,32,91,48,64,91,80,64,91,112,64,91,144,64,91,173,64,91,203,64,91, +233,64,91,6,65,91,36,65,91,66,65,91,96,65,91,16,66,91,43,66,91,70,66,91,97,66,91,16,68,91,43,68,91,70,68,91,97,68,91,16,70,91,43,70,91,70,70,91, +97,70,91,16,72,91,43,72,91,70,72,91,97,72,91,37,80,91,67,80,91,98,80,91,129,80,91,160,80,91,16,0,92,48,0,92,80,0,92,112,0,92,144,0,92,176,0,92, +208,0,92,240,0,92,14,1,92,16,16,92,48,16,92,80,16,92,112,16,92,144,16,92,172,16,92,198,16,92,225,16,92,16,32,92,48,32,92,80,32,92,109,32,92,138,32,92, +165,32,92,193,32,92,16,64,92,48,64,92,80,64,92,112,64,92,144,64,92,176,64,92,207,64,92,234,64,92,3,65,92,16,72,92,48,72,92,65,72,92,16,80,92,46,80,92, +75,80,92,104,80,92,133,80,92,162,80,92,10,96,92,39,96,92,68,96,92,98,96,92,129,96,92,16,128,92,48,128,92,80,128,92,112,128,92,144,128,92,176,128,92,207,128,92, +234,128,92,3,129,92,16,130,92,48,130,92,65,130,92,16,132,92,48,132,92,65,132,92,16,134,92,48,134,92,65,134,92,16,144,92,46,144,92,75,144,92,104,144,92,133,144,92, +162,144,92,10,160,92,39,160,92,68,160,92,98,160,92,129,160,92,16,192,92,48,192,92,80,192,92,112,192,92,144,192,92,171,192,92,199,192,92,228,192,92,16,194,92,40,194,92, +16,196,92,40,196,92,16,198,92,40,198,92,10,208,92,39,208,92,68,208,92,98,208,92,129,208,92,4,224,92,34,224,92,64,224,92,16,0,93,48,0,93,80,0,93,112,0,93, +144,0,93,171,0,93,199,0,93,228,0,93,16,2,93,40,2,93,16,4,93,40,4,93,16,6,93,40,6,93,16,8,93,40,8,93,10,16,93,39,16,93,68,16,93,98,16,93, +129,16,93,4,32,93,34,32,93,64,32,93,16,64,93,48,64,93,79,64,93,108,64,93,138,64,93,166,64,93,195,64,93,225,64,93,15,66,93,36,66,93,15,68,93,36,68,93, +15,70,93,36,70,93,15,72,93,36,72,93,6,80,93,35,80,93,65,80,93,96,80,93,16,0,94,48,0,94,80,0,94,112,0,94,144,0,94,176,0,94,16,16,94,48,16,94, +80,16,94,109,16,94,135,16,94,16,32,94,48,32,94,75,32,94,102,32,94,130,32,94,16,64,94,48,64,94,80,64,94,112,64,94,144,64,94,167,64,94,16,72,94,33,72,94, +16,80,94,43,80,94,70,80,94,99,80,94,128,80,94,9,96,94,36,96,94,65,96,94,16,128,94,48,128,94,80,128,94,112,128,94,144,128,94,167,128,94,16,130,94,33,130,94, +16,132,94,33,132,94,16,134,94,33,134,94,16,144,94,43,144,94,70,144,94,99,144,94,128,144,94,9,160,94,36,160,94,65,160,94,16,192,94,48,192,94,80,192,94,108,192,94, +136,192,94,162,192,94,16,194,94,16,196,94,16,198,94,9,208,94,36,208,94,65,208,94,3,224,94,32,224,94,16,0,95,48,0,95,80,0,95,108,0,95,136,0,95,162,0,95, +16,2,95,16,4,95,16,6,95,16,8,95,9,16,95,36,16,95,65,16,95,3,32,95,32,32,95,16,64,95,47,64,95,75,64,95,103,64,95,132,64,95,160,64,95,10,66,95, +10,68,95,10,70,95,10,72,95,4,80,95,33,80,95,16,0,96,48,0,96,80,0,96,112,0,96,144,0,96,16,16,96,48,16,96,76,16,96,99,16,96,16,32,96,44,32,96, +69,32,96,16,64,96,48,64,96,80,64,96,108,64,96,132,64,96,15,80,96,39,80,96,66,80,96,7,96,96,34,96,96,16,128,96,48,128,96,80,128,96,108,128,96,132,128,96, +15,144,96,39,144,96,66,144,96,7,160,96,34,160,96,16,192,96,48,192,96,75,192,96,101,192,96,128,192,96,7,208,96,34,208,96,2,224,96,16,0,97,48,0,97,75,0,97, +101,0,97,128,0,97,7,16,97,34,16,97,2,32,97,16,64,97,44,64,97,70,64,97,98,64,97,3,80,97,16,0,98,48,0,98,80,0,98,16,16,98,47,16,98,67,16,98, +16,32,98,39,32,98,16,64,98,48,64,98,76,64,98,13,80,98,35,80,98,6,96,98,16,128,98,48,128,98,76,128,98,13,144,98,35,144,98,6,160,98,16,192,98,45,192,98, +69,192,98,6,208,98,1,224,98,16,0,99,45,0,99,69,0,99,6,16,99,1,32,99,16,64,99,40,64,99,66,64,99,2,80,99,16,0,100,48,0,100,78,0,100,16,16,100, +40,16,100,16,32,100,35,32,100,16,64,100,48,64,100,67,64,100,11,80,100,32,80,100,4,96,100,16,128,100,48,128,100,67,128,100,11,144,100,32,144,100,4,160,100,16,192,100, +41,192,100,4,208,100,0,224,100,16,0,101,41,0,101,4,16,101,0,32,101,15,64,101,36,64,101,1,80,101,16,0,102,48,0,102,16,16,102,33,16,102,14,32,102,16,64,102, +42,64,102,9,80,102,3,96,102,16,128,102,42,128,102,9,144,102,3,160,102,16,192,102,36,192,102,3,208,102,16,0,103,36,0,103,3,16,103,13,64,103,33,64,103,0,80,103, +16,0,104,46,0,104,16,16,104,11,32,104,16,64,104,35,64,104,6,80,104,1,96,104,16,128,104,35,128,104,6,144,104,1,160,104,16,192,104,1,208,104,16,0,105,1,16,105, +11,64,105,80,0,110,112,0,110,144,0,110,176,0,110,208,0,110,240,0,110,16,1,110,48,1,110,80,1,110,112,1,110,80,16,110,112,16,110,144,16,110,176,16,110,208,16,110, +240,16,110,16,17,110,48,17,110,80,17,110,109,17,110,80,32,110,112,32,110,144,32,110,176,32,110,208,32,110,240,32,110,13,33,110,43,33,110,73,33,110,102,33,110,80,64,110, +112,64,110,144,64,110,176,64,110,208,64,110,240,64,110,16,65,110,48,65,110,80,65,110,112,65,110,16,72,110,48,72,110,80,72,110,112,72,110,144,72,110,170,72,110,195,72,110, +80,80,110,112,80,110,143,80,110,173,80,110,203,80,110,234,80,110,8,81,110,38,81,110,68,81,110,99,81,110,73,96,110,104,96,110,135,96,110,166,96,110,197,96,110,228,96,110, +2,97,110,33,97,110,64,97,110,80,128,110,112,128,110,144,128,110,176,128,110,208,128,110,240,128,110,16,129,110,48,129,110,80,129,110,112,129,110,16,130,110,48,130,110,80,130,110, +112,130,110,144,130,110,170,130,110,195,130,110,16,132,110,48,132,110,80,132,110,112,132,110,144,132,110,170,132,110,195,132,110,16,134,110,48,134,110,80,134,110,112,134,110,144,134,110, +170,134,110,195,134,110,80,144,110,112,144,110,143,144,110,173,144,110,203,144,110,234,144,110,8,145,110,38,145,110,68,145,110,99,145,110,73,160,110,104,160,110,135,160,110,166,160,110, +197,160,110,228,160,110,2,161,110,33,161,110,64,161,110,80,192,110,112,192,110,144,192,110,176,192,110,208,192,110,240,192,110,16,193,110,48,193,110,79,193,110,108,193,110,16,194,110, +48,194,110,80,194,110,109,194,110,138,194,110,164,194,110,16,196,110,48,196,110,80,196,110,109,196,110,138,196,110,164,196,110,16,198,110,48,198,110,80,198,110,109,198,110,138,198,110, +164,198,110,73,208,110,104,208,110,135,208,110,166,208,110,197,208,110,228,208,110,2,209,110,33,209,110,64,209,110,67,224,110,99,224,110,130,224,110,161,224,110,192,224,110,224,224,110, +80,0,111,112,0,111,144,0,111,176,0,111,208,0,111,240,0,111,16,1,111,48,1,111,79,1,111,108,1,111,16,2,111,48,2,111,80,2,111,109,2,111,138,2,111,164,2,111, +16,4,111,48,4,111,80,4,111,109,4,111,138,4,111,164,4,111,16,6,111,48,6,111,80,6,111,109,6,111,138,6,111,164,6,111,16,8,111,48,8,111,80,8,111,109,8,111, +138,8,111,164,8,111,73,16,111,104,16,111,135,16,111,166,16,111,197,16,111,228,16,111,2,17,111,33,17,111,64,17,111,67,32,111,99,32,111,130,32,111,161,32,111,192,32,111, +224,32,111,80,64,111,112,64,111,144,64,111,176,64,111,208,64,111,238,64,111,12,65,111,42,65,111,73,65,111,103,65,111,16,66,111,48,66,111,76,66,111,104,66,111,133,66,111, +161,66,111,16,68,111,48,68,111,76,68,111,104,68,111,133,68,111,161,68,111,16,70,111,48,70,111,76,70,111,104,70,111,133,70,111,161,70,111,16,72,111,48,72,111,76,72,111, +104,72,111,133,72,111,161,72,111,69,80,111,100,80,111,131,80,111,162,80,111,193,80,111,225,80,111,0,81,111,48,0,112,80,0,112,112,0,112,144,0,112,176,0,112,208,0,112, +240,0,112,16,1,112,48,1,112,48,16,112,80,16,112,112,16,112,144,16,112,176,16,112,205,16,112,233,16,112,3,17,112,48,32,112,80,32,112,112,32,112,142,32,112,170,32,112, +198,32,112,227,32,112,48,64,112,80,64,112,112,64,112,144,64,112,176,64,112,208,64,112,240,64,112,12,65,112,39,65,112,16,72,112,48,72,112,74,72,112,48,80,112,77,80,112, +106,80,112,136,80,112,165,80,112,195,80,112,224,80,112,40,96,112,70,96,112,100,96,112,131,96,112,161,96,112,48,128,112,80,128,112,112,128,112,144,128,112,176,128,112,208,128,112, +240,128,112,12,129,112,39,129,112,16,130,112,48,130,112,74,130,112,16,132,112,48,132,112,74,132,112,16,134,112,48,134,112,74,134,112,48,144,112,77,144,112,106,144,112,136,144,112, +165,144,112,195,144,112,224,144,112,40,160,112,70,160,112,100,160,112,131,160,112,161,160,112,48,192,112,80,192,112,112,192,112,144,192,112,176,192,112,204,192,112,233,192,112,5,193,112, +34,193,112,16,194,112,44,194,112,68,194,112,16,196,112,44,196,112,68,196,112,16,198,112,44,198,112,68,198,112,40,208,112,70,208,112,100,208,112,131,208,112,161,208,112,35,224,112, +65,224,112,96,224,112,48,0,113,80,0,113,112,0,113,144,0,113,176,0,113,204,0,113,233,0,113,5,1,113,34,1,113,16,2,113,44,2,113,68,2,113,16,4,113,44,4,113, +68,4,113,16,6,113,44,6,113,68,6,113,16,8,113,44,8,113,68,8,113,40,16,113,70,16,113,100,16,113,131,16,113,161,16,113,35,32,113,65,32,113,96,32,113,48,64,113, +80,64,113,111,64,113,141,64,113,170,64,113,199,64,113,229,64,113,2,65,113,16,66,113,39,66,113,65,66,113,16,68,113,39,68,113,65,68,113,16,70,113,39,70,113,65,70,113, +16,72,113,39,72,113,65,72,113,36,80,113,66,80,113,97,80,113,128,80,113,16,0,114,48,0,114,80,0,114,112,0,114,144,0,114,176,0,114,208,0,114,16,16,114,48,16,114, +80,16,114,112,16,114,139,16,114,163,16,114,16,32,114,48,32,114,77,32,114,104,32,114,133,32,114,16,64,114,48,64,114,80,64,114,112,64,114,144,64,114,172,64,114,196,64,114, +16,72,114,38,72,114,16,80,114,44,80,114,72,80,114,100,80,114,129,80,114,9,96,114,37,96,114,66,96,114,96,96,114,16,128,114,48,128,114,80,128,114,112,128,114,144,128,114, +172,128,114,196,128,114,16,130,114,38,130,114,16,132,114,38,132,114,16,134,114,38,134,114,16,144,114,44,144,114,72,144,114,100,144,114,129,144,114,9,160,114,37,160,114,66,160,114, +96,160,114,16,192,114,48,192,114,80,192,114,110,192,114,139,192,114,165,192,114,192,192,114,16,194,114,33,194,114,16,196,114,33,196,114,16,198,114,33,198,114,9,208,114,37,208,114, +66,208,114,96,208,114,3,224,114,33,224,114,16,0,115,48,0,115,80,0,115,110,0,115,139,0,115,165,0,115,192,0,115,16,2,115,33,2,115,16,4,115,33,4,115,16,6,115, +33,6,115,16,8,115,33,8,115,9,16,115,37,16,115,66,16,115,96,16,115,3,32,115,33,32,115,16,64,115,48,64,115,76,64,115,105,64,115,134,64,115,162,64,115,12,66,115, +12,68,115,12,70,115,12,72,115,5,80,115,34,80,115,64,80,115,16,0,116,48,0,116,80,0,116,112,0,116,144,0,116,16,16,116,48,16,116,77,16,116,100,16,116,16,32,116, +45,32,116,70,32,116,96,32,116,16,64,116,48,64,116,80,64,116,109,64,116,135,64,116,16,80,116,40,80,116,67,80,116,8,96,116,34,96,116,16,128,116,48,128,116,80,128,116, +109,128,116,135,128,116,16,144,116,40,144,116,67,144,116,8,160,116,34,160,116,16,192,116,48,192,116,76,192,116,102,192,116,130,192,116,8,208,116,34,208,116,2,224,116,16,0,117, +48,0,117,76,0,117,102,0,117,130,0,117,8,16,117,34,16,117,2,32,117,16,64,117,44,64,117,71,64,117,99,64,117,4,80,117,32,80,117,16,0,118,48,0,118,80,0,118, +16,16,118,47,16,118,67,16,118,16,32,118,39,32,118,16,64,118,48,64,118,76,64,118,13,80,118,35,80,118,6,96,118,16,128,118,48,128,118,76,128,118,13,144,118,35,144,118, +6,160,118,16,192,118,45,192,118,69,192,118,6,208,118,1,224,118,16,0,119,45,0,119,69,0,119,6,16,119,1,32,119,16,64,119,40,64,119,66,64,119,2,80,119,16,0,120, +48,0,120,16,16,120,39,16,120,16,32,120,34,32,120,16,64,120,48,64,120,10,80,120,4,96,120,16,128,120,48,128,120,10,144,120,4,160,120,16,192,120,40,192,120,4,208,120, +0,224,120,16,0,121,40,0,121,4,16,121,0,32,121,15,64,121,36,64,121,1,80,121,16,0,122,48,0,122,16,16,122,13,32,122,16,64,122,39,64,122,8,80,122,2,96,122, +16,128,122,39,128,122,8,144,122,2,160,122,16,192,122,34,192,122,2,208,122,16,0,123,34,0,123,2,16,123,12,64,123,32,64,123,0,80,123,16,0,124,16,16,124,10,32,124, +16,64,124,5,80,124,1,96,124,16,128,124,5,144,124,1,160,124,16,192,124,1,208,124,16,0,125,1,16,125,10,64,125,48,0,132,80,0,132,112,0,132,144,0,132,176,0,132, +208,0,132,240,0,132,16,1,132,48,1,132,80,1,132,112,1,132,48,16,132,80,16,132,112,16,132,144,16,132,176,16,132,208,16,132,240,16,132,16,17,132,45,17,132,74,17,132, +102,17,132,48,32,132,80,32,132,112,32,132,144,32,132,176,32,132,206,32,132,236,32,132,9,33,132,38,33,132,68,33,132,97,33,132,48,64,132,80,64,132,112,64,132,144,64,132, +176,64,132,208,64,132,240,64,132,16,65,132,48,65,132,80,65,132,111,65,132,16,72,132,48,72,132,80,72,132,112,72,132,138,72,132,161,72,132,48,80,132,80,80,132,111,80,132, +141,80,132,171,80,132,201,80,132,231,80,132,5,81,132,35,81,132,65,81,132,42,96,132,72,96,132,103,96,132,134,96,132,164,96,132,195,96,132,226,96,132,0,97,132,48,128,132, +80,128,132,112,128,132,144,128,132,176,128,132,208,128,132,240,128,132,16,129,132,48,129,132,80,129,132,111,129,132,16,130,132,48,130,132,80,130,132,112,130,132,138,130,132,161,130,132, +16,132,132,48,132,132,80,132,132,112,132,132,138,132,132,161,132,132,16,134,132,48,134,132,80,134,132,112,134,132,138,134,132,161,134,132,48,144,132,80,144,132,111,144,132,141,144,132, +171,144,132,201,144,132,231,144,132,5,145,132,35,145,132,65,145,132,42,160,132,72,160,132,103,160,132,134,160,132,164,160,132,195,160,132,226,160,132,0,161,132,48,192,132,80,192,132, +112,192,132,144,192,132,176,192,132,208,192,132,240,192,132,15,193,132,44,193,132,74,193,132,103,193,132,16,194,132,48,194,132,78,194,132,105,194,132,132,194,132,16,196,132,48,196,132, +78,196,132,105,196,132,132,196,132,16,198,132,48,198,132,78,198,132,105,198,132,132,198,132,42,208,132,72,208,132,103,208,132,134,208,132,164,208,132,195,208,132,226,208,132,0,209,132, +36,224,132,67,224,132,98,224,132,129,224,132,160,224,132,48,0,133,80,0,133,112,0,133,144,0,133,176,0,133,208,0,133,240,0,133,15,1,133,44,1,133,74,1,133,103,1,133, +16,2,133,48,2,133,78,2,133,105,2,133,132,2,133,16,4,133,48,4,133,78,4,133,105,4,133,132,4,133,16,6,133,48,6,133,78,6,133,105,6,133,132,6,133,16,8,133, +48,8,133,78,8,133,105,8,133,132,8,133,42,16,133,72,16,133,103,16,133,134,16,133,164,16,133,195,16,133,226,16,133,0,17,133,36,32,133,67,32,133,98,32,133,129,32,133, +160,32,133,48,64,133,80,64,133,112,64,133,144,64,133,175,64,133,205,64,133,236,64,133,9,65,133,39,65,133,70,65,133,99,65,133,16,66,133,45,66,133,73,66,133,100,66,133, +129,66,133,16,68,133,45,68,133,73,68,133,100,68,133,129,68,133,16,70,133,45,70,133,73,70,133,100,70,133,129,70,133,16,72,133,45,72,133,73,72,133,100,72,133,129,72,133, +37,80,133,68,80,133,99,80,133,130,80,133,161,80,133,192,80,133,16,0,134,48,0,134,80,0,134,112,0,134,144,0,134,176,0,134,208,0,134,240,0,134,14,1,134,16,16,134, +48,16,134,80,16,134,112,16,134,144,16,134,172,16,134,198,16,134,225,16,134,16,32,134,48,32,134,80,32,134,109,32,134,138,32,134,165,32,134,193,32,134,16,64,134,48,64,134, +80,64,134,112,64,134,144,64,134,176,64,134,207,64,134,234,64,134,3,65,134,16,72,134,48,72,134,65,72,134,16,80,134,46,80,134,75,80,134,104,80,134,133,80,134,162,80,134, +10,96,134,39,96,134,68,96,134,98,96,134,129,96,134,16,128,134,48,128,134,80,128,134,112,128,134,144,128,134,176,128,134,207,128,134,234,128,134,3,129,134,16,130,134,48,130,134, +65,130,134,16,132,134,48,132,134,65,132,134,16,134,134,48,134,134,65,134,134,16,144,134,46,144,134,75,144,134,104,144,134,133,144,134,162,144,134,10,160,134,39,160,134,68,160,134, +98,160,134,129,160,134,16,192,134,48,192,134,80,192,134,112,192,134,144,192,134,171,192,134,199,192,134,228,192,134,16,194,134,40,194,134,16,196,134,40,196,134,16,198,134,40,198,134, +10,208,134,39,208,134,68,208,134,98,208,134,129,208,134,4,224,134,34,224,134,64,224,134,16,0,135,48,0,135,80,0,135,112,0,135,144,0,135,171,0,135,199,0,135,228,0,135, +16,2,135,40,2,135,16,4,135,40,4,135,16,6,135,40,6,135,16,8,135,40,8,135,10,16,135,39,16,135,68,16,135,98,16,135,129,16,135,4,32,135,34,32,135,64,32,135, +16,64,135,48,64,135,79,64,135,108,64,135,138,64,135,166,64,135,195,64,135,225,64,135,15,66,135,36,66,135,15,68,135,36,68,135,15,70,135,36,70,135,15,72,135,36,72,135, +6,80,135,35,80,135,65,80,135,96,80,135,16,0,136,48,0,136,80,0,136,112,0,136,144,0,136,174,0,136,16,16,136,48,16,136,80,16,136,106,16,136,131,16,136,16,32,136, +47,32,136,73,32,136,100,32,136,16,64,136,48,64,136,80,64,136,112,64,136,140,64,136,163,64,136,16,72,136,16,80,136,41,80,136,69,80,136,97,80,136,8,96,136,35,96,136, +64,96,136,16,128,136,48,128,136,80,128,136,112,128,136,140,128,136,163,128,136,16,130,136,16,132,136,16,134,136,16,144,136,41,144,136,69,144,136,97,144,136,8,160,136,35,160,136, +64,160,136,16,192,136,48,192,136,79,192,136,106,192,136,133,192,136,14,194,136,14,196,136,14,198,136,8,208,136,35,208,136,64,208,136,3,224,136,32,224,136,16,0,137,48,0,137, +79,0,137,106,0,137,133,0,137,14,2,137,14,4,137,14,6,137,14,8,137,8,16,137,35,16,137,64,16,137,3,32,137,32,32,137,16,64,137,46,64,137,73,64,137,101,64,137, +130,64,137,9,66,137,9,68,137,9,70,137,9,72,137,4,80,137,32,80,137,16,0,138,48,0,138,80,0,138,112,0,138,16,16,138,48,16,138,70,16,138,16,32,138,41,32,138, +65,32,138,16,64,138,48,64,138,79,64,138,100,64,138,14,80,138,37,80,138,6,96,138,32,96,138,16,128,138,48,128,138,79,128,138,100,128,138,14,144,138,37,144,138,6,160,138, +32,160,138,16,192,138,47,192,138,71,192,138,96,192,138,6,208,138,32,208,138,1,224,138,16,0,139,47,0,139,71,0,139,96,0,139,6,16,139,32,16,139,1,32,139,16,64,139, +41,64,139,67,64,139,3,80,139,16,0,140,48,0,140,78,0,140,16,16,140,40,16,140,16,32,140,35,32,140,16,64,140,48,64,140,67,64,140,11,80,140,32,80,140,4,96,140, +16,128,140,48,128,140,67,128,140,11,144,140,32,144,140,4,160,140,16,192,140,41,192,140,4,208,140,0,224,140,16,0,141,41,0,141,4,16,141,0,32,141,15,64,141,36,64,141, +1,80,141,16,0,142,48,0,142,16,16,142,13,32,142,16,64,142,39,64,142,8,80,142,2,96,142,16,128,142,39,128,142,8,144,142,2,160,142,16,192,142,34,192,142,2,208,142, +16,0,143,34,0,143,2,16,143,12,64,143,32,64,143,0,80,143,16,0,144,16,16,144,9,32,144,16,64,144,5,80,144,0,96,144,16,128,144,5,144,144,0,160,144,15,192,144, +0,208,144,15,0,145,0,16,145,9,64,145,48,0,154,80,0,154,112,0,154,144,0,154,176,0,154,208,0,154,240,0,154,16,1,154,48,1,154,80,1,154,112,1,154,48,16,154, +80,16,154,112,16,154,144,16,154,176,16,154,208,16,154,240,16,154,12,17,154,39,17,154,68,17,154,48,32,154,80,32,154,112,32,154,144,32,154,174,32,154,203,32,154,233,32,154, +5,33,154,34,33,154,64,33,154,48,64,154,80,64,154,112,64,154,144,64,154,176,64,154,208,64,154,240,64,154,16,65,154,48,65,154,77,65,154,103,65,154,16,72,154,48,72,154, +80,72,154,106,72,154,131,72,154,48,80,154,79,80,154,109,80,154,139,80,154,169,80,154,198,80,154,228,80,154,2,81,154,32,81,154,41,96,154,71,96,154,102,96,154,133,96,154, +163,96,154,193,96,154,224,96,154,48,128,154,80,128,154,112,128,154,144,128,154,176,128,154,208,128,154,240,128,154,16,129,154,48,129,154,77,129,154,103,129,154,16,130,154,48,130,154, +80,130,154,106,130,154,131,130,154,16,132,154,48,132,154,80,132,154,106,132,154,131,132,154,16,134,154,48,134,154,80,134,154,106,134,154,131,134,154,48,144,154,79,144,154,109,144,154, +139,144,154,169,144,154,198,144,154,228,144,154,2,145,154,32,145,154,41,160,154,71,160,154,102,160,154,133,160,154,163,160,154,193,160,154,224,160,154,48,192,154,80,192,154,112,192,154, +144,192,154,176,192,154,208,192,154,239,192,154,11,193,154,40,193,154,70,193,154,98,193,154,16,194,154,48,194,154,74,194,154,100,194,154,16,196,154,48,196,154,74,196,154,100,196,154, +16,198,154,48,198,154,74,198,154,100,198,154,41,208,154,71,208,154,102,208,154,133,208,154,163,208,154,193,208,154,224,208,154,35,224,154,66,224,154,97,224,154,128,224,154,48,0,155, +80,0,155,112,0,155,144,0,155,176,0,155,208,0,155,239,0,155,11,1,155,40,1,155,70,1,155,98,1,155,16,2,155,48,2,155,74,2,155,100,2,155,16,4,155,48,4,155, +74,4,155,100,4,155,16,6,155,48,6,155,74,6,155,100,6,155,16,8,155,48,8,155,74,8,155,100,8,155,41,16,155,71,16,155,102,16,155,133,16,155,163,16,155,193,16,155, +224,16,155,35,32,155,66,32,155,97,32,155,128,32,155,48,64,155,80,64,155,112,64,155,144,64,155,173,64,155,203,64,155,233,64,155,6,65,155,36,65,155,66,65,155,96,65,155, +16,66,155,43,66,155,70,66,155,97,66,155,16,68,155,43,68,155,70,68,155,97,68,155,16,70,155,43,70,155,70,70,155,97,70,155,16,72,155,43,72,155,70,72,155,97,72,155, +37,80,155,67,80,155,98,80,155,129,80,155,160,80,155,16,0,156,48,0,156,80,0,156,112,0,156,144,0,156,176,0,156,208,0,156,16,16,156,48,16,156,80,16,156,112,16,156, +141,16,156,165,16,156,16,32,156,48,32,156,78,32,156,106,32,156,134,32,156,161,32,156,16,64,156,48,64,156,80,64,156,112,64,156,144,64,156,174,64,156,199,64,156,16,72,156, +40,72,156,16,80,156,44,80,156,73,80,156,101,80,156,130,80,156,10,96,156,37,96,156,67,96,156,97,96,156,16,128,156,48,128,156,80,128,156,112,128,156,144,128,156,174,128,156, +199,128,156,16,130,156,40,130,156,16,132,156,40,132,156,16,134,156,40,134,156,16,144,156,44,144,156,73,144,156,101,144,156,130,144,156,10,160,156,37,160,156,67,160,156,97,160,156, +16,192,156,48,192,156,80,192,156,112,192,156,140,192,156,167,192,156,194,192,156,16,194,156,35,194,156,16,196,156,35,196,156,16,198,156,35,198,156,10,208,156,37,208,156,67,208,156, +97,208,156,4,224,156,33,224,156,16,0,157,48,0,157,80,0,157,112,0,157,140,0,157,167,0,157,194,0,157,16,2,157,35,2,157,16,4,157,35,4,157,16,6,157,35,6,157, +16,8,157,35,8,157,10,16,157,37,16,157,67,16,157,97,16,157,4,32,157,33,32,157,16,64,157,48,64,157,77,64,157,106,64,157,135,64,157,163,64,157,192,64,157,12,66,157, +32,66,157,12,68,157,32,68,157,12,70,157,32,70,157,12,72,157,32,72,157,5,80,157,34,80,157,64,80,157,16,0,158,48,0,158,80,0,158,112,0,158,144,0,158,16,16,158, +48,16,158,76,16,158,99,16,158,16,32,158,44,32,158,69,32,158,16,64,158,48,64,158,80,64,158,108,64,158,132,64,158,15,80,158,39,80,158,66,80,158,7,96,158,34,96,158, +16,128,158,48,128,158,80,128,158,108,128,158,132,128,158,15,144,158,39,144,158,66,144,158,7,160,158,34,160,158,16,192,158,48,192,158,75,192,158,101,192,158,128,192,158,7,208,158, +34,208,158,2,224,158,16,0,159,48,0,159,75,0,159,101,0,159,128,0,159,7,16,159,34,16,159,2,32,159,16,64,159,44,64,159,70,64,159,98,64,159,3,80,159,16,0,160, +48,0,160,80,0,160,16,16,160,44,16,160,16,32,160,37,32,160,16,64,160,48,64,160,71,64,160,12,80,160,34,80,160,5,96,160,16,128,160,48,128,160,71,128,160,12,144,160, +34,144,160,5,160,160,16,192,160,43,192,160,66,192,160,5,208,160,1,224,160,16,0,161,43,0,161,66,0,161,5,16,161,1,32,161,16,64,161,38,64,161,64,64,161,2,80,161, +16,0,162,48,0,162,16,16,162,33,16,162,14,32,162,16,64,162,42,64,162,9,80,162,3,96,162,16,128,162,42,128,162,9,144,162,3,160,162,16,192,162,36,192,162,3,208,162, +16,0,163,36,0,163,3,16,163,13,64,163,33,64,163,0,80,163,16,0,164,16,16,164,10,32,164,16,64,164,5,80,164,1,96,164,16,128,164,5,144,164,1,160,164,16,192,164, +1,208,164,16,0,165,1,16,165,10,64,165,48,0,176,80,0,176,112,0,176,144,0,176,176,0,176,208,0,176,240,0,176,16,1,176,48,1,176,80,1,176,48,16,176,80,16,176, +112,16,176,144,16,176,176,16,176,208,16,176,236,16,176,6,17,176,33,17,176,48,32,176,80,32,176,112,32,176,143,32,176,171,32,176,200,32,176,229,32,176,1,33,176,48,64,176, +80,64,176,112,64,176,144,64,176,176,64,176,208,64,176,240,64,176,15,65,176,42,65,176,70,65,176,16,72,176,48,72,176,77,72,176,99,72,176,48,80,176,78,80,176,107,80,176, +137,80,176,166,80,176,196,80,176,226,80,176,40,96,176,70,96,176,101,96,176,131,96,176,161,96,176,192,96,176,48,128,176,80,128,176,112,128,176,144,128,176,176,128,176,208,128,176, +240,128,176,15,129,176,42,129,176,70,129,176,16,130,176,48,130,176,77,130,176,99,130,176,16,132,176,48,132,176,77,132,176,99,132,176,16,134,176,48,134,176,77,134,176,99,134,176, +48,144,176,78,144,176,107,144,176,137,144,176,166,144,176,196,144,176,226,144,176,40,160,176,70,160,176,101,160,176,131,160,176,161,160,176,192,160,176,48,192,176,80,192,176,112,192,176, +144,192,176,176,192,176,206,192,176,235,192,176,7,193,176,36,193,176,65,193,176,16,194,176,46,194,176,70,194,176,16,196,176,46,196,176,70,196,176,16,198,176,46,198,176,70,198,176, +40,208,176,70,208,176,101,208,176,131,208,176,161,208,176,192,208,176,35,224,176,65,224,176,96,224,176,128,224,176,48,0,177,80,0,177,112,0,177,144,0,177,176,0,177,206,0,177, +235,0,177,7,1,177,36,1,177,65,1,177,16,2,177,46,2,177,70,2,177,16,4,177,46,4,177,70,4,177,16,6,177,46,6,177,70,6,177,16,8,177,46,8,177,70,8,177, +40,16,177,70,16,177,101,16,177,131,16,177,161,16,177,192,16,177,35,32,177,65,32,177,96,32,177,128,32,177,48,64,177,80,64,177,112,64,177,142,64,177,171,64,177,200,64,177, +230,64,177,3,65,177,33,65,177,16,66,177,41,66,177,67,66,177,16,68,177,41,68,177,67,68,177,16,70,177,41,70,177,67,70,177,16,72,177,41,72,177,67,72,177,36,80,177, +67,80,177,97,80,177,128,80,177,16,0,178,48,0,178,80,0,178,112,0,178,144,0,178,176,0,178,16,16,178,48,16,178,80,16,178,109,16,178,135,16,178,16,32,178,48,32,178, +75,32,178,102,32,178,130,32,178,16,64,178,48,64,178,80,64,178,112,64,178,144,64,178,167,64,178,16,72,178,33,72,178,16,80,178,43,80,178,70,80,178,99,80,178,128,80,178, +9,96,178,36,96,178,65,96,178,16,128,178,48,128,178,80,128,178,112,128,178,144,128,178,167,128,178,16,130,178,33,130,178,16,132,178,33,132,178,16,134,178,33,134,178,16,144,178, +43,144,178,70,144,178,99,144,178,128,144,178,9,160,178,36,160,178,65,160,178,16,192,178,48,192,178,80,192,178,108,192,178,136,192,178,162,192,178,16,194,178,16,196,178,16,198,178, +9,208,178,36,208,178,65,208,178,3,224,178,32,224,178,16,0,179,48,0,179,80,0,179,108,0,179,136,0,179,162,0,179,16,2,179,16,4,179,16,6,179,16,8,179,9,16,179, +36,16,179,65,16,179,3,32,179,32,32,179,16,64,179,47,64,179,75,64,179,103,64,179,132,64,179,160,64,179,10,66,179,10,68,179,10,70,179,10,72,179,4,80,179,33,80,179, +16,0,180,48,0,180,80,0,180,112,0,180,16,16,180,48,16,180,70,16,180,16,32,180,41,32,180,65,32,180,16,64,180,48,64,180,79,64,180,100,64,180,14,80,180,37,80,180, +6,96,180,32,96,180,16,128,180,48,128,180,79,128,180,100,128,180,14,144,180,37,144,180,6,160,180,32,160,180,16,192,180,47,192,180,71,192,180,96,192,180,6,208,180,32,208,180, +1,224,180,16,0,181,47,0,181,71,0,181,96,0,181,6,16,181,32,16,181,1,32,181,16,64,181,41,64,181,67,64,181,3,80,181,16,0,182,48,0,182,16,16,182,38,16,182, +16,32,182,33,32,182,16,64,182,47,64,182,10,80,182,4,96,182,16,128,182,47,128,182,10,144,182,4,160,182,16,192,182,39,192,182,4,208,182,0,224,182,16,0,183,39,0,183, +4,16,183,0,32,183,15,64,183,35,64,183,1,80,183,16,0,184,46,0,184,16,16,184,11,32,184,16,64,184,35,64,184,6,80,184,1,96,184,16,128,184,35,128,184,6,144,184, +1,160,184,16,192,184,1,208,184,16,0,185,1,16,185,11,64,185,48,0,198,80,0,198,112,0,198,144,0,198,176,0,198,208,0,198,240,0,198,16,1,198,46,1,198,48,16,198, +80,16,198,112,16,198,144,16,198,176,16,198,202,16,198,230,16,198,0,17,198,48,32,198,80,32,198,111,32,198,140,32,198,168,32,198,196,32,198,225,32,198,48,64,198,80,64,198, +112,64,198,144,64,198,176,64,198,208,64,198,239,64,198,9,65,198,35,65,198,16,72,198,48,72,198,71,72,198,47,80,198,76,80,198,105,80,198,135,80,198,164,80,198,193,80,198, +39,96,198,69,96,198,99,96,198,130,96,198,160,96,198,48,128,198,80,128,198,112,128,198,144,128,198,176,128,198,208,128,198,239,128,198,9,129,198,35,129,198,16,130,198,48,130,198, +71,130,198,16,132,198,48,132,198,71,132,198,16,134,198,48,134,198,71,134,198,47,144,198,76,144,198,105,144,198,135,144,198,164,144,198,193,144,198,39,160,198,69,160,198,99,160,198, +130,160,198,160,160,198,48,192,198,80,192,198,112,192,198,144,192,198,174,192,198,202,192,198,231,192,198,3,193,198,16,194,198,43,194,198,66,194,198,16,196,198,43,196,198,66,196,198, +16,198,198,43,198,198,66,198,198,39,208,198,69,208,198,99,208,198,130,208,198,160,208,198,34,224,198,65,224,198,96,224,198,48,0,199,80,0,199,112,0,199,144,0,199,174,0,199, +202,0,199,231,0,199,3,1,199,16,2,199,43,2,199,66,2,199,16,4,199,43,4,199,66,4,199,16,6,199,43,6,199,66,6,199,16,8,199,43,8,199,66,8,199,39,16,199, +69,16,199,99,16,199,130,16,199,160,16,199,34,32,199,65,32,199,96,32,199,48,64,199,80,64,199,110,64,199,140,64,199,169,64,199,198,64,199,227,64,199,0,65,199,16,66,199, +38,66,199,64,66,199,16,68,199,38,68,199,64,68,199,16,70,199,38,70,199,64,70,199,16,72,199,38,72,199,64,72,199,35,80,199,66,80,199,96,80,199,16,0,200,48,0,200, +80,0,200,112,0,200,144,0,200,16,16,200,48,16,200,80,16,200,104,16,200,129,16,200,16,32,200,47,32,200,72,32,200,99,32,200,16,64,200,48,64,200,80,64,200,112,64,200, +138,64,200,16,80,200,41,80,200,68,80,200,96,80,200,8,96,200,35,96,200,64,96,200,16,128,200,48,128,200,80,128,200,112,128,200,138,128,200,16,144,200,41,144,200,68,144,200, +96,144,200,8,160,200,35,160,200,64,160,200,16,192,200,48,192,200,78,192,200,105,192,200,132,192,200,8,208,200,35,208,200,64,208,200,3,224,200,16,0,201,48,0,201,78,0,201, +105,0,201,132,0,201,8,16,201,35,16,201,64,16,201,3,32,201,16,64,201,45,64,201,73,64,201,100,64,201,129,64,201,4,80,201,32,80,201,16,0,202,48,0,202,80,0,202, +16,16,202,45,16,202,64,16,202,16,32,202,38,32,202,16,64,202,48,64,202,73,64,202,12,80,202,34,80,202,5,96,202,16,128,202,48,128,202,73,128,202,12,144,202,34,144,202, +5,160,202,16,192,202,44,192,202,67,192,202,5,208,202,1,224,202,16,0,203,44,0,203,67,0,203,5,16,203,1,32,203,16,64,203,39,64,203,64,64,203,2,80,203,16,0,204, +48,0,204,16,16,204,32,16,204,14,32,204,16,64,204,41,64,204,8,80,204,3,96,204,16,128,204,41,128,204,8,144,204,3,160,204,16,192,204,35,192,204,3,208,204,16,0,205, +35,0,205,3,16,205,13,64,205,32,64,205,0,80,205,16,0,220,48,0,220,80,0,220,112,0,220,144,0,220,176,0,220,208,0,220,240,0,220,14,1,220,16,16,220,48,16,220, +80,16,220,112,16,220,144,16,220,172,16,220,198,16,220,225,16,220,16,32,220,48,32,220,80,32,220,109,32,220,138,32,220,165,32,220,193,32,220,16,64,220,48,64,220,80,64,220, +112,64,220,144,64,220,176,64,220,207,64,220,234,64,220,3,65,220,16,72,220,48,72,220,65,72,220,16,80,220,46,80,220,75,80,220,104,80,220,133,80,220,162,80,220,10,96,220, +39,96,220,68,96,220,98,96,220,129,96,220,16,128,220,48,128,220,80,128,220,112,128,220,144,128,220,176,128,220,207,128,220,234,128,220,3,129,220,16,130,220,48,130,220,65,130,220, +16,132,220,48,132,220,65,132,220,16,134,220,48,134,220,65,134,220,16,144,220,46,144,220,75,144,220,104,144,220,133,144,220,162,144,220,10,160,220,39,160,220,68,160,220,98,160,220, +129,160,220,16,192,220,48,192,220,80,192,220,112,192,220,144,192,220,171,192,220,199,192,220,228,192,220,16,194,220,40,194,220,16,196,220,40,196,220,16,198,220,40,198,220,10,208,220, +39,208,220,68,208,220,98,208,220,129,208,220,4,224,220,34,224,220,64,224,220,16,0,221,48,0,221,80,0,221,112,0,221,144,0,221,171,0,221,199,0,221,228,0,221,16,2,221, +40,2,221,16,4,221,40,4,221,16,6,221,40,6,221,16,8,221,40,8,221,10,16,221,39,16,221,68,16,221,98,16,221,129,16,221,4,32,221,34,32,221,64,32,221,16,64,221, +48,64,221,79,64,221,108,64,221,138,64,221,166,64,221,195,64,221,225,64,221,15,66,221,36,66,221,15,68,221,36,68,221,15,70,221,36,70,221,15,72,221,36,72,221,6,80,221, +35,80,221,65,80,221,96,80,221,16,0,222,48,0,222,80,0,222,112,0,222,144,0,222,16,16,222,48,16,222,76,16,222,99,16,222,16,32,222,44,32,222,69,32,222,16,64,222, +48,64,222,80,64,222,108,64,222,132,64,222,15,80,222,39,80,222,66,80,222,7,96,222,34,96,222,16,128,222,48,128,222,80,128,222,108,128,222,132,128,222,15,144,222,39,144,222, +66,144,222,7,160,222,34,160,222,16,192,222,48,192,222,75,192,222,101,192,222,128,192,222,7,208,222,34,208,222,2,224,222,16,0,223,48,0,223,75,0,223,101,0,223,128,0,223, +7,16,223,34,16,223,2,32,223,16,64,223,44,64,223,70,64,223,98,64,223,3,80,223,16,0,224,48,0,224,78,0,224,16,16,224,40,16,224,16,32,224,35,32,224,16,64,224, +48,64,224,67,64,224,11,80,224,32,80,224,4,96,224,16,128,224,48,128,224,67,128,224,11,144,224,32,144,224,4,160,224,16,192,224,41,192,224,4,208,224,0,224,224,16,0,225, +41,0,225,4,16,225,0,32,225,15,64,225,36,64,225,1,80,225,16,0,226,46,0,226,16,16,226,11,32,226,16,64,226,35,64,226,6,80,226,1,96,226,16,128,226,35,128,226, +6,144,226,1,160,226,16,192,226,1,208,226,16,0,227,1,16,227,11,64,227 +}; diff --git a/external/basis_universal/transcoder/basisu_astc_hdr_core.h b/external/basis_universal/transcoder/basisu_astc_hdr_core.h index 8e63f72718..9f6b69e669 100644 --- a/external/basis_universal/transcoder/basisu_astc_hdr_core.h +++ b/external/basis_universal/transcoder/basisu_astc_hdr_core.h @@ -1,6 +1,5 @@ // File: basisu_astc_hdr_core.h #pragma once -#include "basisu_astc_helpers.h" namespace basist { @@ -109,7 +108,10 @@ namespace basist const uint32_t TOTAL_BLOCK_MODE_DECS = 75; extern const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS]; - void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk); + const uint32_t UASTC_6x6_HDR_SIG0 = 0xABCD; // original release (v1.6) + const uint32_t UASTC_6x6_HDR_SIG1 = 0xABCE; // 2x2->4x4 weight grid upsampling change, not backwards compatible with older decoders + + void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk, bool orig_behavior); enum class encoding_type { @@ -181,7 +183,7 @@ namespace basist bool m_hq_ls; bool m_brute_force_weight4_assignment; - + fast_bc6h_params() { init(); @@ -196,6 +198,9 @@ namespace basist } }; + // Encodes to BC6H (unsigned variant). + // pPixels: pointer to 16 RGB half-float/FP16 values (48 total half-floats), in raster order. + // Max encodable value is (in float) basist::MAX_HALF_FLOAT. void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params ¶ms); bool decode_6x6_hdr(const uint8_t* pComp_data, uint32_t comp_data_size, basisu::vector2D& decoded_blocks, uint32_t& width, uint32_t& height); @@ -203,3 +208,4 @@ namespace basist } // namespace astc_6x6_hdr } // namespace basist + diff --git a/external/basis_universal/transcoder/basisu_astc_helpers.h b/external/basis_universal/transcoder/basisu_astc_helpers.h index 37918054d3..ef47285210 100644 --- a/external/basis_universal/transcoder/basisu_astc_helpers.h +++ b/external/basis_universal/transcoder/basisu_astc_helpers.h @@ -10,12 +10,22 @@ namespace astc_helpers { - const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64] const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels + const uint32_t MAX_BLOCK_PIXELS = MAX_BLOCK_DIM * MAX_BLOCK_DIM; const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values - const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; - + const uint32_t MAX_CEM_ENDPOINT_VALS = 8; // see Table 94. ASTC LDR/HDR color endpoint modes (max 8 values to encode any CEM, minimum 2) + + // The number of BISE values needed to encode endpoints for each CEM. + const uint32_t NUM_MODE0_ENDPOINTS = 2, NUM_MODE4_ENDPOINTS = 4; + const uint32_t NUM_MODE6_ENDPOINTS = 4, NUM_MODE8_ENDPOINTS = 6, NUM_MODE9_ENDPOINTS = 6; // LDR RGB + const uint32_t NUM_MODE10_ENDPOINTS = 6, NUM_MODE12_ENDPOINTS = 8, NUM_MODE13_ENDPOINTS = 8; // LDR RGBA + const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; // hdr + + const uint32_t MAX_WEIGHTS = 32; // max supported # of weights (or "selectors") in any mode, i.e. the max # of colors per endpoint pair + const uint32_t MAX_WEIGHT_INTERPOLANT_VALUE = 64; // grid texel weights must range from [0,64], i.e. the weight interpolant range is [0,64] + + // 14 unique block dimensions supported by ASTC static const uint32_t NUM_ASTC_BLOCK_SIZES = 14; extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2]; @@ -70,6 +80,29 @@ namespace astc_helpers const uint32_t TOTAL_ISE_RANGES = 21; + enum + { + cBLOCK_SIZE_4x4 = 0, // 16 samples + cBLOCK_SIZE_5x4 = 1, // 20 samples + cBLOCK_SIZE_5x5 = 2, // 25 samples + cBLOCK_SIZE_6x5 = 3, // 30 samples + + cBLOCK_SIZE_6x6 = 4, // 36 samples + cBLOCK_SIZE_8x5 = 5, // 40 samples + cBLOCK_SIZE_8x6 = 6, // 48 samples + cBLOCK_SIZE_10x5 = 7, // 50 samples + + cBLOCK_SIZE_10x6 = 8, // 60 samples + cBLOCK_SIZE_8x8 = 9, // 64 samples + cBLOCK_SIZE_10x8 = 10, // 80 samples + cBLOCK_SIZE_10x10 = 11, // 100 samples + + cBLOCK_SIZE_12x10 = 12, // 120 samples + cBLOCK_SIZE_12x12 = 13, // 144 samples + + cTOTAL_BLOCK_SIZES = 14 + }; + // Valid endpoint ISE ranges const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4 const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20 @@ -79,11 +112,11 @@ namespace astc_helpers const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0 const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11 const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1; - + // The ISE range table. extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1) - // Possible Color Component Select values, used in dual plane mode. + // Possible Color Component Select values, used in dual plane mode. // The CCS component will be interpolated using the 2nd weight plane. enum ccs { @@ -92,7 +125,7 @@ namespace astc_helpers CCS_RGA_B = 2, CCS_RGB_A = 3 }; - + struct astc_block { uint32_t m_vals[4]; @@ -106,14 +139,14 @@ namespace astc_helpers struct log_astc_block { bool m_error_flag; - + bool m_solid_color_flag_ldr, m_solid_color_flag_hdr; uint8_t m_user_mode; // user defined value, not used in this module - + // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block - + bool m_dual_plane; uint8_t m_weight_ise_range; // 0-11 @@ -123,16 +156,16 @@ namespace astc_helpers uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode) uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1 - + uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's - + union { // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc. uint8_t m_weights[MAX_GRID_WEIGHTS]; uint16_t m_solid_color[4]; }; - + // ISE endpoint values // Endpoint order examples: // 1 subset LA : LL0 LH0 AL0 AH0 @@ -142,13 +175,13 @@ namespace astc_helpers // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1 // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1 uint8_t m_endpoints[MAX_ENDPOINTS]; - + void clear() { memset(this, 0, sizeof(*this)); } }; - + // Open interval inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } @@ -166,8 +199,8 @@ namespace astc_helpers } // Returns the number of levels in the given ISE range. - inline uint32_t get_ise_levels(uint32_t ise_range) - { + inline uint32_t get_ise_levels(uint32_t ise_range) + { assert(ise_range < TOTAL_ISE_RANGES); return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0]; } @@ -180,10 +213,10 @@ namespace astc_helpers total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3; return total_bits; } - + inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w) { - assert(w <= MAX_WEIGHT_VALUE); + assert(w <= MAX_WEIGHT_INTERPOLANT_VALUE); return (l * (64 - w) + h * w + 32) >> 6; } @@ -198,9 +231,15 @@ namespace astc_helpers inline pack_stats() { clear(); } inline void clear() { memset(this, 0, sizeof(*this)); } }; - + + enum + { + cValidateEarlyOutAtEndpointISEChecks = 1, + cValidateSkipFinalEndpointWeightPacking = 2, + }; + // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions. - bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr); + bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr, uint32_t validate_flags = 0); // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component. void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr); @@ -209,10 +248,10 @@ namespace astc_helpers void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr); // These helpers are all quite slow, but are useful for table preparation. - + // Dequantizes ISE encoded endpoint val to [0,255] uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11 - + // Dequantizes ISE encoded weight val to [0,64] uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10 @@ -231,12 +270,21 @@ namespace astc_helpers bool is_cem_ldr(uint32_t mode); inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); } + bool does_cem_have_alpha(uint32_t mode); + // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp). bool is_valid_block_size(uint32_t w, uint32_t h); + + // w/h must be a valid ASTC block size, or it returns cBLOCK_SIZE_4x4 + uint32_t get_block_size_index(uint32_t w, uint32_t h); + + float get_bitrate_from_block_size(uint32_t w, uint32_t h); + + uint32_t get_texel_partition_from_table(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t subsets, uint32_t x, uint32_t y); bool block_has_any_hdr_cems(const log_astc_block& log_blk); bool block_has_any_ldr_cems(const log_astc_block& log_blk); - + // Returns the # of endpoint values for the given CEM. inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); } @@ -245,17 +293,28 @@ namespace astc_helpers basisu::vector m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] basisu::vector m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] basisu::vector m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels] - basisu::vector m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] + basisu::vector m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] - void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs) + void init(bool weight_flag, uint32_t num_levels) { - m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256); + m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256); m_ISE_to_val.resize(num_levels); - if (init_rank_tabs) - { - m_ISE_to_rank.resize(num_levels); - m_rank_to_ISE.resize(num_levels); - } + m_ISE_to_rank.resize(num_levels); + m_rank_to_ISE.resize(num_levels); + } + + uint32_t get_rank_to_val(uint32_t rank) const + { + const uint32_t ise = m_rank_to_ISE[rank]; + const uint32_t val = m_ISE_to_val[ise]; + return val; + } + + uint32_t get_val_to_rank(uint32_t val) + { + const uint32_t ise = m_val_to_ise[val]; + const uint32_t rank = m_ISE_to_rank[ise]; + return rank; } }; @@ -263,6 +322,7 @@ namespace astc_helpers { dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES]; dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES]; + bool m_initialized_flag = false; const dequant_table& get_weight_tab(uint32_t range) const { @@ -288,16 +348,19 @@ namespace astc_helpers return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; } - void init(bool init_rank_tabs) + void init() { + if (m_initialized_flag) + return; + for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++) { const uint32_t num_levels = get_ise_levels(range); dequant_table& tab = get_weight_tab(range); - tab.init(true, num_levels, init_rank_tabs); + tab.init(true, num_levels); - create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true); + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, true); } for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++) @@ -305,15 +368,17 @@ namespace astc_helpers const uint32_t num_levels = get_ise_levels(range); dequant_table& tab = get_endpoint_tab(range); - tab.init(false, num_levels, init_rank_tabs); + tab.init(false, num_levels); - create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false); + create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, false); } + + m_initialized_flag = true; } }; extern dequant_tables g_dequant_tables; - void init_tables(bool init_rank_tabs); + void init_tables(); struct weighted_sample { @@ -333,9 +398,22 @@ namespace astc_helpers const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] uint8_t* pDst_weights); // [by][bx] - // Procedurally returns the texel partition/subset index given the block coordinate and config. + void upsample_weight_grid_xuastc_ldr( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights0, // [by][bx] + const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights1); // [by][bx] + + bool is_small_block(uint32_t block_width, uint32_t block_height); + + // Procedurally returns the texel partition/subset index given the block coordinate and config (very slow). int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block); + // Returns the texel partition/subset index given the block coordinate and config - table lookup, but currently ONLY 2-3 SUBSETS to save RAM. + int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions); + void blue_contract( int r, int g, int b, int a, int& dr, int& dg, int& db, int& da); @@ -372,7 +450,7 @@ namespace astc_helpers const int MAX_RGB9E5 = 0xff80; void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b); uint32_t pack_rgb9e5(float r, float g, float b); - + enum decode_mode { cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks @@ -385,11 +463,71 @@ namespace astc_helpers // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16) bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode); + // Assuming the ASTC logical block is valid, this checks for the extra XUASTC LDR constraints. + bool is_block_xuastc_ldr(const log_astc_block& log_blk); + + // XUASTC LDR only - primary assumption is the logical block comes directly from our supercompressor. DO NOT call on general ASTC blocks. + bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode, const uint8_t* pUpsampled_weights_to_use = nullptr, uint32_t start_x = 0, uint32_t start_y = 0, uint32_t end_x = 0, uint32_t end_y = 0); + void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs); // Unpack a physical ASTC encoded GPU texture block to a logical block description. bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height); + uint8_t& get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t idx); + uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t idx); + void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index); + void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index); + uint32_t get_total_weights(const log_astc_block& log_block); + + uint8_t* get_endpoints(log_astc_block& log_block, uint32_t partition_index); + const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index); + + const char* get_cem_name(uint32_t cem_index); + bool cem_is_ldr_direct(uint32_t cem_index); + bool cem_is_ldr_base_scale(uint32_t cem_index); + bool cem_is_ldr_base_plus_ofs(uint32_t cem_index); + + bool cem_supports_bc(uint32_t cem); + + void bit_transfer_signed_dec(int& a, int& b); + void bit_transfer_signed_enc(int& a, int& b); + + bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index); + + uint32_t get_base_cem_without_alpha(uint32_t cem); + + int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta); + + // index range: [0,NUM_ASTC_BLOCK_SIZES-1] + void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height); + + // -1 if invalid + int find_astc_block_size_index(uint32_t width, uint32_t height); + + // 8-bit linear8 or sRGB8, le/he are [0,255], w is [0,64] + inline int channel_interpolate(int le, int he, int w, bool astc_srgb_decode) + { + assert((w >= 0) && (w <= 64)); + assert((le >= 0) && (le <= 255)); + assert((he >= 0) && (he <= 255)); + + if (astc_srgb_decode) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + return astc_helpers::weight_interpolate(le, he, w) >> 8; + } + } // namespace astc_helpers #endif // BASISU_ASTC_HELPERS_HEADER @@ -403,11 +541,11 @@ namespace astc_helpers template inline T my_min(T a, T b) { return (a < b) ? a : b; } template inline T my_max(T a, T b) { return (a > b) ? a : b; } - const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { - { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, - { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, - { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, - { 12, 10 }, { 12, 12 } + const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { + { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, + { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, + { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, + { 12, 10 }, { 12, 12 } }; const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] = @@ -436,7 +574,7 @@ namespace astc_helpers { 6, 1, 0 }, // 0..191 19 { 8, 0, 0 }, // 0..255 20 }; - + static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize) { uint8_t* pBuf = reinterpret_cast(pDst); @@ -551,10 +689,10 @@ namespace astc_helpers // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); - + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); - + if (pStats) *pStats += n * 5 + 8; } @@ -582,7 +720,7 @@ namespace astc_helpers if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -593,7 +731,7 @@ namespace astc_helpers for (int i = 0; i < limit; i++) vals[i] = pSrc_vals[group_index * group_size + i]; - // Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed. + // Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed. // get_ise_sequence_bits() returns the # of bits that must be written for proper decoding. if (group_size == 5) astc_encode_trits(temp, vals, bit_pos, num_bits, pStats); @@ -632,14 +770,14 @@ namespace astc_helpers const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits - + // See Tables 81-82 // Compute p from weight range uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0); - + // Rearrange p's bits to p0 p2 p1 p = (p >> 1) + ((p & 1) << 2); - + // Try encoding each row of table 82. // W+4 H+2 @@ -676,7 +814,7 @@ namespace astc_helpers config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); return true; } - + // 12 H+2 if ((W == 12) && is_packable(H - 2, 2)) { @@ -704,7 +842,7 @@ namespace astc_helpers config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2); return true; } - + // W+6 H+6 (no dual plane or high prec) if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2)) { @@ -715,9 +853,19 @@ namespace astc_helpers // Failed: unsupported weight grid dimensions or config. return false; } - - bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats) + + bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats, uint32_t validate_flags) { + // Basic sanity checking + if (!log_block.m_dual_plane) + { + assert(log_block.m_color_component_selector == 0); + } + else + { + assert(log_block.m_color_component_selector <= 3); + } + memset(&phys_block, 0, sizeof(phys_block)); if (pExpected_endpoint_range) @@ -726,7 +874,7 @@ namespace astc_helpers assert(!log_block.m_error_flag); if (log_block.m_error_flag) return false; - + if (log_block.m_solid_color_flag_ldr) { pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); @@ -737,7 +885,7 @@ namespace astc_helpers pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats); return true; } - + if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS)) return false; @@ -753,7 +901,7 @@ namespace astc_helpers return false; // TODO: sanity check grid width/height vs. block's physical width/height - + uint32_t config_bits = 0; if (!get_config_bits(log_block, config_bits)) return false; @@ -794,7 +942,7 @@ namespace astc_helpers if (highest_cem > 15) return false; - + // Ensure CEM range is contiguous if (((highest_cem >> 2) > (1 + (lowest_cem >> 2)))) return false; @@ -809,7 +957,7 @@ namespace astc_helpers for (uint32_t j = 0; j < log_block.m_num_partitions; j++) { const int M = log_block.m_color_endpoint_modes[j] & 3; - + const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1); if ((C & 1) != C) return false; @@ -850,7 +998,7 @@ namespace astc_helpers return false; total_extra_bits += 2; - + uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits; astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2); if (pStats) @@ -868,6 +1016,9 @@ namespace astc_helpers if (total_cem_vals > MAX_ENDPOINTS) return false; + + if (validate_flags & cValidateEarlyOutAtEndpointISEChecks) + return true; int endpoint_ise_range = -1; for (int k = 20; k > 0; k--) @@ -898,6 +1049,9 @@ namespace astc_helpers pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); } + if (validate_flags & cValidateSkipFinalEndpointWeightPacking) + return true; + // Pack endpoints forwards encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range); @@ -1094,12 +1248,12 @@ namespace astc_helpers uint32_t u = 0; switch (ise_range) { - case 0: + case 0: { u = val ? 63 : 0; break; } - case 1: // 0-2 + case 1: // 0-2 { const uint8_t s_tab_0_2[3] = { 0, 32, 63 }; u = s_tab_0_2[val]; @@ -1140,7 +1294,7 @@ namespace astc_helpers const uint32_t num_bits = g_ise_range_table[ise_range][0]; const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); - + // compute Table 103 row index const int range_index = num_bits * 2 + (num_quints ? 1 : 0); @@ -1153,11 +1307,11 @@ namespace astc_helpers // Now dequantize // See Table 103. ASTC weight unquantization parameters static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 }; - + const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1; const uint32_t A = (a == 0) ? 0 : 0x7F; - + uint32_t B = 0; if (range_index == 4) B = ((b << 6) | (b << 2) | (b << 0)); @@ -1210,7 +1364,7 @@ namespace astc_helpers uint32_t find_nearest_bise_weight(int v, uint32_t ise_range) { assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); - assert(v <= (int)MAX_WEIGHT_VALUE); + assert(v <= (int)MAX_WEIGHT_INTERPOLANT_VALUE); const uint32_t total_levels = get_ise_levels(ise_range); int best_e = INT_MAX, best_index = 0; @@ -1237,7 +1391,7 @@ namespace astc_helpers uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights bool weight_flag) // false if block endpoints, true if weights { - const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256; + const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256; for (uint32_t i = 0; i < num_dequant_vals; i++) { @@ -1273,22 +1427,22 @@ namespace astc_helpers for (uint32_t i = 0; i < num_levels; i++) { uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range); - + // Low=ISE value // High=dequantized value vals[i] = (v << 16) | i; } - + // Sorts by dequantized value std::sort(vals, vals + num_levels); - + for (uint32_t rank = 0; rank < num_levels; rank++) { uint32_t ise_val = (uint8_t)vals[rank]; if (pISE_to_rank) pISE_to_rank[ise_val] = (uint8_t)rank; - + if (pRank_to_ISE) pRank_to_ISE[rank] = (uint8_t)ise_val; } @@ -1314,17 +1468,17 @@ namespace astc_helpers pDst[15] = (uint8_t)(ah >> 8); if (pStats) - pStats->m_header_bits += 128; + pStats->m_header_bits += 16 + 64; } // rh-ah are half-floats - void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats) + void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats) { uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; memset(pDst, 0xFF, 16); pDst[0] = 0b11111100; - + pDst[8] = (uint8_t)rh; pDst[9] = (uint8_t)(rh >> 8); pDst[10] = (uint8_t)gh; @@ -1335,9 +1489,9 @@ namespace astc_helpers pDst[15] = (uint8_t)(ah >> 8); if (pStats) - pStats->m_header_bits += 128; + pStats->m_header_bits += 8 + 64; } - + bool is_cem_ldr(uint32_t mode) { switch (mode) @@ -1356,39 +1510,132 @@ namespace astc_helpers default: break; } + + return false; + } + + bool does_cem_have_alpha(uint32_t mode) + { + switch (mode) + { + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + case CEM_HDR_RGB_LDR_ALPHA: + case CEM_HDR_RGB_HDR_ALPHA: + return true; + default: + break; + } return false; } bool is_valid_block_size(uint32_t w, uint32_t h) { - assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM)); - assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM)); - -#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; - SIZECHK(4, 4); - SIZECHK(5, 4); +#define BU_ASTC_HELPERS_SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; + BU_ASTC_HELPERS_SIZECHK(4, 4); // 0 + BU_ASTC_HELPERS_SIZECHK(5, 4); // 1 - SIZECHK(5, 5); + BU_ASTC_HELPERS_SIZECHK(5, 5); // 2 - SIZECHK(6, 5); - SIZECHK(6, 6); + BU_ASTC_HELPERS_SIZECHK(6, 5); // 3 + BU_ASTC_HELPERS_SIZECHK(6, 6); // 4 - SIZECHK(8, 5); - SIZECHK(8, 6); - SIZECHK(10, 5); - SIZECHK(10, 6); + BU_ASTC_HELPERS_SIZECHK(8, 5); // 5 + BU_ASTC_HELPERS_SIZECHK(8, 6); // 6 + BU_ASTC_HELPERS_SIZECHK(10, 5); // 7 + BU_ASTC_HELPERS_SIZECHK(10, 6); // 8 - SIZECHK(8, 8); - SIZECHK(10, 8); - SIZECHK(10, 10); + BU_ASTC_HELPERS_SIZECHK(8, 8); // 9 + BU_ASTC_HELPERS_SIZECHK(10, 8); // 10 + BU_ASTC_HELPERS_SIZECHK(10, 10); // 11 - SIZECHK(12, 10); - SIZECHK(12, 12); -#undef SIZECHK + BU_ASTC_HELPERS_SIZECHK(12, 10); // 12 + BU_ASTC_HELPERS_SIZECHK(12, 12); // 13 +#undef BU_ASTC_HELPERS_SIZECHK return false; } + + uint32_t get_block_size_index(uint32_t w, uint32_t h) + { + assert(is_valid_block_size(w, h)); + + const uint32_t t = w * h; + + if (t <= 36) + { + if (t == 36) + return cBLOCK_SIZE_6x6; + else if (t == 16) + return cBLOCK_SIZE_4x4; + else if (t == 25) + return cBLOCK_SIZE_5x5; + else if (t == 20) + return cBLOCK_SIZE_5x4; + else if (t == 30) + return cBLOCK_SIZE_6x5; + } + else if (t <= 64) + { + if (t == 64) + return cBLOCK_SIZE_8x8; + else if (t == 60) + return cBLOCK_SIZE_10x6; + else if (t == 50) + return cBLOCK_SIZE_10x5; + else if (t == 48) + return cBLOCK_SIZE_8x6; + else if (t == 40) + return cBLOCK_SIZE_8x5; + } + else + { + if (t == 80) + return cBLOCK_SIZE_10x8; + else if (t == 100) + return cBLOCK_SIZE_10x10; + else if (t == 120) + return cBLOCK_SIZE_12x10; + else if (t == 144) + return cBLOCK_SIZE_12x12; + } + + assert(0); + return cBLOCK_SIZE_4x4; + } + + // returns the standard ASTC bitrates given a valid block size from the ASTC spec. + // 0=invalid block size + float get_bitrate_from_block_size(uint32_t w, uint32_t h) + { +#define BU_ASTC_HELPERS_BLOCK_BITRATE(x, y, b) if ((w == (x)) && (h == (y))) return (b); + BU_ASTC_HELPERS_BLOCK_BITRATE(4, 4, 8.0f); + BU_ASTC_HELPERS_BLOCK_BITRATE(5, 4, 6.4f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(5, 5, 5.12f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(6, 5, 4.27f); + BU_ASTC_HELPERS_BLOCK_BITRATE(6, 6, 3.56f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 5, 3.20f); + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 6, 2.67f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 5, 2.56f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 6, 2.13f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(8, 8, 2.00f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 8, 1.60f); + BU_ASTC_HELPERS_BLOCK_BITRATE(10, 10, 1.28f); + + BU_ASTC_HELPERS_BLOCK_BITRATE(12, 10, 1.07f); + BU_ASTC_HELPERS_BLOCK_BITRATE(12, 12, .89f); +#undef BU_ASTC_HELPERS_BLOCK_BITRATE + + return 0.0f; + } bool block_has_any_hdr_cems(const log_astc_block& log_blk) { @@ -1411,20 +1658,19 @@ namespace astc_helpers return false; } - + dequant_tables g_dequant_tables; - - void precompute_texel_partitions_4x4(); - void precompute_texel_partitions_6x6(); - - void init_tables(bool init_rank_tabs) + + void precompute_texel_partitions(); + + // TODO: this is called twice when using the encoder, first init_rank_tabs=false then init_rank_tabs=true. + void init_tables() { - g_dequant_tables.init(init_rank_tabs); - - precompute_texel_partitions_4x4(); - precompute_texel_partitions_6x6(); + g_dequant_tables.init(); + + precompute_texel_partitions(); } - + void compute_upsample_weights( int block_width, int block_height, int weight_grid_width, int weight_grid_height, @@ -1474,6 +1720,8 @@ namespace astc_helpers if (total_src_weights == total_dst_weights) { + assert((bx == wx) && (by == wy)); + memcpy(pDst_weights, pSrc_weights, total_src_weights); return; } @@ -1507,6 +1755,78 @@ namespace astc_helpers } } + void upsample_weight_grid_xuastc_ldr( + uint32_t bx, uint32_t by, // destination/to dimension + uint32_t wx, uint32_t wy, // source/from dimension + const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights0, // [by][bx] + const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] + uint8_t* pDst_weights1) // [by][bx] + { + assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); + assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); + + assert((bx != wx) || (by != wy)); + + const uint32_t scaleX = (1024 + bx / 2) / (bx - 1); + const uint32_t scaleY = (1024 + by / 2) / (by - 1); + + const uint32_t gYUInc = scaleY * (wy - 1); + const uint32_t gXUInc = scaleX * (wx - 1); + + uint32_t gYU = 32; + for (uint32_t texel_y = 0; texel_y < by; texel_y++) + { + const uint32_t gY = gYU >> 6; + gYU += gYUInc; + + const uint32_t jY = gY >> 4; + const uint32_t fY = gY & 0xf; + + uint32_t gXU = 32; + for (uint32_t texel_x = 0; texel_x < bx; texel_x++) + { + const uint32_t gX = gXU >> 6; + gXU += gXUInc; + + const uint32_t jX = gX >> 4; + const uint32_t fX = gX & 0xf; + + const uint32_t w11 = (fX * fY + 8) >> 4; + const uint32_t w10 = fY - w11; + const uint32_t w01 = fX - w11; + const uint32_t w00 = 16 - fX - fY + w11; + + assert(w00 || w01 || w10 || w11); + + const uint32_t sx = jX, sy = jY; + + { + uint32_t total0 = 8; + + if (w00) total0 += pSrc_weights0[sx + sy * wx] * w00; + if (w01) total0 += pSrc_weights0[sx + 1 + sy * wx] * w01; + if (w10) total0 += pSrc_weights0[sx + (sy + 1) * wx] * w10; + if (w11) total0 += pSrc_weights0[sx + 1 + (sy + 1) * wx] * w11; + + pDst_weights0[texel_x + texel_y * bx] = (uint8_t)(total0 >> 4); + } + + if (pDst_weights1) + { + uint32_t total1 = 8; + + if (w00) total1 += pSrc_weights1[sx + sy * wx] * w00; + if (w01) total1 += pSrc_weights1[sx + 1 + sy * wx] * w01; + if (w10) total1 += pSrc_weights1[sx + (sy + 1) * wx] * w10; + if (w11) total1 += pSrc_weights1[sx + 1 + (sy + 1) * wx] * w11; + + pDst_weights1[texel_x + texel_y * bx] = (uint8_t)(total1 >> 4); + } + } // texel_x + } // texel_y + } + inline uint32_t hash52(uint32_t v) { uint32_t p = v; @@ -1516,6 +1836,16 @@ namespace astc_helpers return p; } + bool is_small_block(uint32_t block_width, uint32_t block_height) + { + assert((block_width >= MIN_BLOCK_DIM) && (block_width <= MAX_BLOCK_DIM)); + assert((block_height >= MIN_BLOCK_DIM) && (block_height <= MAX_BLOCK_DIM)); + + const uint32_t num_blk_pixels = block_width * block_height; + + return num_blk_pixels < 31; + } + // small_block = num_blk_pixels < 31 int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block) { @@ -1582,76 +1912,108 @@ namespace astc_helpers : (c >= d) ? 2 : 3; } + + // Precomputed partition patterns for each 10-bit seed and small/large block sizes for 2-3 subsets. + // This costs 144KB of RAM and some init, but considering the sheer complexity of compute_texel_partition() and how hotly it's called in the compressors and transcoders that's worth it. + // Byte packing: + // low 4 bits=small blocks (on valid up to 6x5) + // high 4 bits=large blocks (6x6 or larger) - // 4x4, 2 and 3 subsets - static uint32_t g_texel_partitions_4x4[1024][2]; - - // 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits) - static uint8_t g_texel_partitions_6x6[1024][6 * 6]; + static uint8_t g_texel_partitions[NUM_PARTITION_PATTERNS][12][12]; // [seed][y][x] - void precompute_texel_partitions_4x4() + void sanity_check_texel_partition_tables() { - for (uint32_t p = 0; p < 1024; p++) +#if 0 +#if defined(_DEBUG) || defined(DEBUG) + // sanity checking + for (uint32_t i = 0; i < cTOTAL_BLOCK_SIZES; i++) { - uint32_t v2 = 0, v3 = 0; + const uint32_t bw = g_astc_block_sizes[i][0], bh = g_astc_block_sizes[i][1]; + const bool is_small_block_flag = is_small_block(bw, bh); - for (uint32_t y = 0; y < 4; y++) + assert(get_block_size_index(bw, bh) == i); + + for (uint32_t s = 0; s < NUM_PARTITION_PATTERNS; s++) { - for (uint32_t x = 0; x < 4; x++) + for (uint32_t y = 0; y < bh; y++) { - const uint32_t shift = x * 2 + y * 8; - v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift); - v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift); - } - } + for (uint32_t x = 0; x < bw; x++) + { + const uint32_t k2 = compute_texel_partition(s, x, y, 0, 2, is_small_block_flag); + const uint32_t k3 = compute_texel_partition(s, x, y, 0, 3, is_small_block_flag); - g_texel_partitions_4x4[p][0] = v2; - g_texel_partitions_4x4[p][1] = v3; + assert(get_precomputed_texel_partition(bw, bh, s, x, y, 2) == (int)k2); + assert(get_precomputed_texel_partition(bw, bh, s, x, y, 3) == (int)k3); + } // x + } // y + } // s } + printf("precompute_texel_partitions: Sanity check OK\n"); +#endif +#endif } - - void precompute_texel_partitions_6x6() + + void precompute_texel_partition() { - for (uint32_t p = 0; p < 1024; p++) + for (uint32_t seed = 0; seed < NUM_PARTITION_PATTERNS; seed++) { - for (uint32_t y = 0; y < 6; y++) + for (uint32_t y = 0; y < MAX_BLOCK_DIM; y++) { - for (uint32_t x = 0; x < 6; x++) + for (uint32_t x = 0; x < MAX_BLOCK_DIM; x++) { - const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false); - const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false); + uint32_t k = 0; - assert((p2 <= 1) && (p3 <= 2)); - g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2); - } - } - } - } + // small block (width*height<31) + if ((x <= 6) && (y <= 5)) + { + uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, true); assert(v2 <= 1); + uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, true); assert(v3 <= 2); + k |= v2 | (v3 << 2); + } + + // not small block + { + uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, false); assert(v2 <= 1); + uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, false); assert(v3 <= 2); + k |= ((v2 | (v3 << 2)) << 4); + } - static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) - { - assert(g_texel_partitions_4x4[1][0]); - assert(seed < 1024); - assert((x <= 3) && (y <= 3)); - assert((num_partitions >= 2) && (num_partitions <= 3)); + assert(k <= 255); - const uint32_t shift = x * 2 + y * 8; - return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3; + g_texel_partitions[seed][y][x] = (uint8_t)k; + } // x + } // y + } // seed } + + int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t subsets) + { + assert(seed < NUM_PARTITION_PATTERNS); + assert((subsets >= 2) && (subsets <= 3)); + assert((x < block_width) && (y < block_height)); + + const uint32_t v = g_texel_partitions[seed][y][x]; + + uint32_t shift = (subsets == 3) ? 2 : 0; + shift += ((block_width * block_height) >= 31) * 4; + uint32_t res = (v >> shift) & 3; - static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) + // sanity checking + assert(res == (uint32_t)compute_texel_partition(seed, x, y, 0, subsets, is_small_block(block_width, block_height))); + + return res; + } + + void precompute_texel_partitions() { - assert(g_texel_partitions_6x6[0][0]); - assert(seed < 1024); - assert((x <= 5) && (y <= 5)); - assert((num_partitions >= 2) && (num_partitions <= 3)); + if (!g_texel_partitions[0][0][0]) + precompute_texel_partition(); - const uint32_t shift = (num_partitions == 3) ? 4 : 0; - return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3; + sanity_check_texel_partition_tables(); } void blue_contract( - int r, int g, int b, int a, + int r, int g, int b, int a, int &dr, int &dg, int &db, int &da) { dr = (r + b) >> 1; @@ -1666,7 +2028,7 @@ namespace astc_helpers b |= (a & 0x80); a >>= 1; a &= 0x3F; - if ((a & 0x20) != 0) + if ((a & 0x20) != 0) a -= 0x40; } @@ -1900,7 +2262,7 @@ namespace astc_helpers e0_g = y0; e1_g = y1; e0_b = y0; e1_b = y1; e0_a = 0x780; e1_a = 0x780; - + break; } case CEM_HDR_LUM_SMALL_RANGE: @@ -1917,11 +2279,11 @@ namespace astc_helpers y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1); d = (v1 & 0x0F) << 1; } - + y1 = y0 + d; - if (y1 > 0xFFF) + if (y1 > 0xFFF) y1 = 0xFFF; - + e0_r = y0; e1_r = y1; e0_g = y0; e1_g = y1; e0_b = y0; e1_b = y1; @@ -1932,36 +2294,36 @@ namespace astc_helpers case CEM_HDR_RGB_BASE_SCALE: { int v2 = pE[2], v3 = pE[3]; - + int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); - + int majcomp, mode; - if ((modeval & 0xC) != 0xC) + if ((modeval & 0xC) != 0xC) { - majcomp = modeval >> 2; + majcomp = modeval >> 2; mode = modeval & 3; } - else if (modeval != 0xF) + else if (modeval != 0xF) { - majcomp = modeval & 3; + majcomp = modeval & 3; mode = 4; } - else + else { - majcomp = 0; + majcomp = 0; mode = 5; } - int red = v0 & 0x3f; + int red = v0 & 0x3f; int green = v1 & 0x1f; - int blue = v2 & 0x1f; + int blue = v2 & 0x1f; int scale = v3 & 0x1f; - int x0 = (v1 >> 6) & 1; - int x1 = (v1 >> 5) & 1; + int x0 = (v1 >> 6) & 1; + int x1 = (v1 >> 5) & 1; int x2 = (v2 >> 6) & 1; - int x3 = (v2 >> 5) & 1; - int x4 = (v3 >> 7) & 1; + int x3 = (v2 >> 5) & 1; + int x4 = (v3 >> 7) & 1; int x5 = (v3 >> 6) & 1; int x6 = (v3 >> 5) & 1; @@ -1985,25 +2347,25 @@ namespace astc_helpers if (ohm & 0x02) red |= x5 << 10; static const int s_shamts[6] = { 1,1,2,3,4,5 }; - + const int shamt = s_shamts[mode]; - red <<= shamt; - green <<= shamt; - blue <<= shamt; + red <<= shamt; + green <<= shamt; + blue <<= shamt; scale <<= shamt; - if (mode != 5) - { - green = red - green; - blue = red - blue; + if (mode != 5) + { + green = red - green; + blue = red - blue; } - if (majcomp == 1) + if (majcomp == 1) std::swap(red, green); - if (majcomp == 2) + if (majcomp == 2) std::swap(red, blue); - + e1_r = clamp(red, 0, 0xFFF); e1_g = clamp(green, 0, 0xFFF); e1_b = clamp(blue, 0, 0xFFF); @@ -2027,7 +2389,7 @@ namespace astc_helpers e0_a = 0x780; e1_a = 0x780; - if (majcomp == 3) + if (majcomp == 3) { e0_r = v0 << 4; e0_g = v2 << 4; @@ -2128,12 +2490,16 @@ namespace astc_helpers v7 &= (0x3F >> mode); v7 ^= (0x20 >> mode); v7 -= (0x20 >> mode); - v6 <<= (4 - mode); - v7 <<= (4 - mode); + + //v6 <<= (4 - mode); // undefined behavior if neg + v6 = ((uint32_t)v6) << (4 - mode); + + //v7 <<= (4 - mode); // undefined behavior if neg + v7 = ((uint32_t)v7) << (4 - mode); v7 += v6; v7 = clamp(v7, 0, 0xFFF); - e0_a = v6; + e0_a = v6; e1_a = v7; } } @@ -2152,7 +2518,7 @@ namespace astc_helpers } } } - + static inline bool is_half_inf_or_nan(half_float v) { return get_bits(v, 10, 14) == 31; @@ -2265,7 +2631,7 @@ namespace astc_helpers x.u = m | (e << 23) | (s << 31); return x.f; } - + // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31; const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS); @@ -2273,7 +2639,7 @@ namespace astc_helpers const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1); //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP)); const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS)); - + void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b) { int x = packed & 511; @@ -2287,9 +2653,9 @@ namespace astc_helpers g = y * scale; b = z * scale; } - + // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases. - static inline int floor_log2(float x) + static inline int floor_log2(float x) { union float754 { @@ -2325,7 +2691,7 @@ namespace astc_helpers exp_shared += 1; assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); } - else + else { assert(maxm <= MAX_RGB9E5_MANTISSA); } @@ -2337,7 +2703,7 @@ namespace astc_helpers assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA)); assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA)); assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA)); - + return rm | (gm << 9) | (bm << 18) | (exp_shared << 27); } @@ -2348,7 +2714,7 @@ namespace astc_helpers if (!x) return 17; - + uint32_t n = 0; while ((x & 0x10000) == 0) { @@ -2426,17 +2792,8 @@ namespace astc_helpers return texel; } - // Important: pPixels is either 32-bit/texel or 64-bit/texel. - bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) + static void write_error_block(void* pPixels, uint32_t num_blk_pixels, decode_mode dec_mode) { - assert(is_valid_block_size(blk_width, blk_height)); - - assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); - if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) - return false; - - const uint32_t num_blk_pixels = blk_width * blk_height; - // Write block error color if (dec_mode == cDecodeModeHDR16) { @@ -2455,9 +2812,32 @@ namespace astc_helpers for (uint32_t i = 0; i < num_blk_pixels; i++) ((uint32_t*)pPixels)[i] = 0xFFFF00FF; } + } + + // Important: pPixels is either 32-bit/texel or 64-bit/texel. + bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) + { + assert(is_valid_block_size(blk_width, blk_height)); + + // Basic sanity checking + if (!log_blk.m_dual_plane) + { + assert(log_blk.m_color_component_selector == 0); + } + else + { + assert(log_blk.m_color_component_selector <= 3); + } + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) + return false; + const uint32_t num_blk_pixels = blk_width * blk_height; + if (log_blk.m_error_flag) { + write_error_block(pPixels, num_blk_pixels, dec_mode); // Should this return false? It's not an invalid logical block config, though. return false; } @@ -2510,7 +2890,7 @@ namespace astc_helpers float r = half_to_float(log_blk.m_solid_color[0]); float g = half_to_float(log_blk.m_solid_color[1]); float b = half_to_float(log_blk.m_solid_color[2]); - + const uint32_t packed = pack_rgb9e5(r, g, b); for (uint32_t i = 0; i < num_blk_pixels; i++) @@ -2518,36 +2898,71 @@ namespace astc_helpers } else { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; } return true; } - + // Sanity check block's config if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0)) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + if (log_blk.m_color_component_selector > 3) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range); const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range); - + bool is_ldr_endpoints[MAX_PARTITIONS]; // Check CEM's @@ -2555,15 +2970,21 @@ namespace astc_helpers for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) { if (log_blk.m_color_endpoint_modes[i] > 15) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]); - + is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]); } if (total_cem_vals > MAX_ENDPOINTS) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); @@ -2573,21 +2994,28 @@ namespace astc_helpers for (uint32_t i = 0; i < total_cem_vals; i++) { if (log_blk.m_endpoints[i] >= total_endpoint_levels) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; } - + // Dequantize weights to [0,64] uint8_t dequantized_weights[2][12 * 12]; - + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); - + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; for (uint32_t i = 0; i < total_weight_vals; i++) { if (log_blk.m_weights[i] >= total_weight_levels) + { + write_error_block(pPixels, num_blk_pixels, dec_mode); return false; + } const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0; const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i; @@ -2617,10 +3045,9 @@ namespace astc_helpers // Decode texels const bool small_block = num_blk_pixels < 31; - const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); - const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); + const bool use_precomputed_texel_partitions = (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3); const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; - + bool success = true; if (dec_mode == cDecodeModeRGB9E5) @@ -2631,14 +3058,15 @@ namespace astc_helpers for (uint32_t x = 0; x < blk_width; x++) { const uint32_t pixel_index = x + y * blk_width; - + uint32_t subset = 0; if (log_blk.m_num_partitions > 1) { - if (use_precomputed_texel_partitions_4x4) - subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); - else if (use_precomputed_texel_partitions_6x6) - subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } else subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); } @@ -2680,7 +3108,7 @@ namespace astc_helpers if (is_half_inf_or_nan((half_float)comp[c])) comp[c] = 0x7BFF; } - + } // c uint32_t packed; @@ -2697,21 +3125,22 @@ namespace astc_helpers else if (dec_mode == cDecodeModeHDR16) { // Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application) - + // returns half floats for (uint32_t y = 0; y < blk_height; y++) { for (uint32_t x = 0; x < blk_width; x++) { const uint32_t pixel_index = x + y * blk_width; - + uint32_t subset = 0; if (log_blk.m_num_partitions > 1) { - if (use_precomputed_texel_partitions_4x4) - subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); - else if (use_precomputed_texel_partitions_6x6) - subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } else subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); } @@ -2751,13 +3180,13 @@ namespace astc_helpers int he = endpoints[subset][c][1] << 4; int qlog16 = weight_interpolate(le, he, w); - + o = qlog16_to_half(qlog16); if (is_half_inf_or_nan(o)) o = 0x7BFF; } - + ((half_float*)pPixels)[pixel_index * 4 + c] = o; } @@ -2776,17 +3205,18 @@ namespace astc_helpers uint32_t subset = 0; if (log_blk.m_num_partitions > 1) { - if (use_precomputed_texel_partitions_4x4) - subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); - else if (use_precomputed_texel_partitions_6x6) - subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + if (use_precomputed_texel_partitions) + { + subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions); + //assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia + } else subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block); } if (!is_ldr_endpoints[subset]) { - ((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF; + ((uint32_t*)pPixels)[pixel_index] = 0xFFFF00FF; success = false; } else @@ -2801,6 +3231,13 @@ namespace astc_helpers // FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder //if ((dec_mode == cDecodeModeSRGB8) && (c <= 2)) // See https://github.com/ARM-software/astc-encoder/issues/447 + // See latest spec with recent (2023-2024) fixes: + // https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt + // "For _LDR endpoint modes_, each color component C is calculated from the corresponding 8 - bit endpoint components C~0~and C~1~as follows" - does this mean alpha too? I guess so. (8/15/2025.) + + // 2/22/2026: See ARM errata 3922301 "ASTC decompression incorrectly rounds linear color endpoints when using unorm8 decode mode". (We currently always assume unorm8 decode mode.) + // Our ASTC/XUASTC encoders default to the sRGB decode profile, not linear, so at least our default behavior isn't impacted by this. + // https://documentation-service.arm.com/static/67ca1a5ece2747241fced502?utm_source=chatgpt.com if (dec_mode == cDecodeModeSRGB8) { le = (le << 8) | 0x80; @@ -2814,8 +3251,8 @@ namespace astc_helpers uint32_t k = weight_interpolate(le, he, w); - // FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does - // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. + // FIXME (old comment - before 2023/2024 ARM etc. spec fixes): This is what the spec says to do in LDR mode, but this is not what ARM's decoder does + // See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16. // It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit. ((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8); } @@ -2824,10 +3261,428 @@ namespace astc_helpers } // x } // y } - + return success; } + bool is_block_xuastc_ldr(const log_astc_block& log_blk) + { + if (log_blk.m_error_flag) + return false; + + if (log_blk.m_solid_color_flag_ldr) + return true; + + if (log_blk.m_solid_color_flag_hdr) + return false; + + if (log_blk.m_num_partitions > 3) + return false; + + if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > 1)) + return false; + + // TODO: Check partition pattern ID against unique set. + + for (uint32_t i = 1; i < log_blk.m_num_partitions; i++) + if (log_blk.m_color_endpoint_modes[0] != log_blk.m_color_endpoint_modes[i]) + return false; + + switch (log_blk.m_color_endpoint_modes[0]) + { + case CEM_LDR_LUM_DIRECT: + case CEM_LDR_LUM_ALPHA_DIRECT: + case CEM_LDR_RGB_BASE_SCALE: + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + break; + } + default: + { + return false; + } + } + + return true; + } + + // ~2x faster than decode_block(), but XUASTC LDR only. + // pUpsampled_weights_to_use must be at block res, [0,64], single plane blocks ONLY + bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode, + const uint8_t* pUpsampled_weights_to_use, uint32_t start_x, uint32_t start_y, uint32_t end_x, uint32_t end_y) + { + if (!end_x) + end_x = blk_width; + + if (!end_y) + end_y = blk_height; + + assert(start_x < end_x); + assert(start_y < end_y); + assert(end_x <= blk_width); + assert(end_y <= blk_height); + + assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); + assert((dec_mode == cDecodeModeSRGB8) || (dec_mode == cDecodeModeLDR8)); + assert(is_valid_block_size(blk_width, blk_height)); + assert(!log_blk.m_error_flag && !log_blk.m_solid_color_flag_hdr); + + if (!log_blk.m_solid_color_flag_ldr) + { + assert(((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= 3))); + assert((log_blk.m_grid_width >= 2) & (log_blk.m_grid_height >= 2)); + assert((log_blk.m_grid_width <= blk_width) && (log_blk.m_grid_height <= blk_height)); + assert((log_blk.m_grid_width * log_blk.m_grid_height) <= MAX_GRID_WEIGHTS); + assert((log_blk.m_num_partitions > 1) || (log_blk.m_partition_id == 0)); + } + + assert(is_block_xuastc_ldr(log_blk)); + + const uint32_t num_blk_pixels = blk_width * blk_height; + + // Handle solid color blocks + if (log_blk.m_solid_color_flag_ldr) + { + // Convert LDR pixels to 8-bits + uint32_t x; + + ((uint8_t*)&x)[0] = (uint8_t)(log_blk.m_solid_color[0] >> 8); + ((uint8_t*)&x)[1] = (uint8_t)(log_blk.m_solid_color[1] >> 8); + ((uint8_t*)&x)[2] = (uint8_t)(log_blk.m_solid_color[2] >> 8); + ((uint8_t*)&x)[3] = (uint8_t)(log_blk.m_solid_color[3] >> 8); + + uint32_t* pDst = (uint32_t*)pPixels; + + uint32_t i = 0; + while ((i + 3) < num_blk_pixels) + { + pDst[i] = x; + pDst[i + 1] = x; + pDst[i + 2] = x; + pDst[i + 3] = x; + + i += 4; + } + + while (i < num_blk_pixels) + pDst[i++] = x; + + return true; + } + + const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range); + const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data(); + + const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + // Check CEM's + const uint32_t num_cem_vals = get_num_cem_values(log_blk.m_color_endpoint_modes[0]); + const uint32_t total_cem_vals = num_cem_vals * log_blk.m_num_partitions; + + assert(total_cem_vals <= MAX_ENDPOINTS); + + // Dequantized endpoints to [0,255] + uint8_t dequantized_endpoints[MAX_ENDPOINTS]; + + for (uint32_t i = 0; i < total_cem_vals; i++) + { + assert(log_blk.m_endpoints[i] < endpoint_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]]; + } + + // Decode CEM's + int endpoints[4][4][2]; // [subset][comp][l/h] + + uint32_t endpoint_val_index = 0; + const uint32_t cem_index = log_blk.m_color_endpoint_modes[0]; + + uint32_t alpha_mask = 0xFF; + + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + assert(log_blk.m_color_endpoint_modes[subset] == cem_index); + + decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]); + + alpha_mask &= endpoints[subset][3][0]; + alpha_mask &= endpoints[subset][3][1]; + + endpoint_val_index += num_cem_vals; + } + + const bool any_alpha = alpha_mask != 255; + + // Dequantize weights to [0,64] + uint8_t upsampled_weights[2][12 * 12]; + + const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height; + + // Upsample weight grid. [0,64] weights + const uint8_t(*pUpsampled_weights)[12 * 12]; + + uint8_t dequantized_weights[2][12 * 12]; + + // For simplicity, ignore any passed in weights if dual plane + if ((pUpsampled_weights_to_use) && (!log_blk.m_dual_plane)) + { + // Caller is jamming in already unpacked weights for the first plane to save time + pUpsampled_weights = reinterpret_cast(pUpsampled_weights_to_use); + } + else + { + if (log_blk.m_dual_plane) + { + for (uint32_t i = 0; i < total_weight_vals; i++) + { + const uint32_t plane_index = i & 1; + const uint32_t grid_index = i >> 1; + + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]]; + } + } + else + { + for (uint32_t i = 0; i < total_weight_vals; i++) + { + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + dequantized_weights[0][i] = pWeight_dequant[log_blk.m_weights[i]]; + } + } + + pUpsampled_weights = &dequantized_weights[0]; + + if ((log_blk.m_grid_width < blk_width) || (log_blk.m_grid_height < blk_height)) + { + upsample_weight_grid_xuastc_ldr(blk_width, blk_height, + log_blk.m_grid_width, log_blk.m_grid_height, + &dequantized_weights[0][0], &upsampled_weights[0][0], + log_blk.m_dual_plane ? &dequantized_weights[1][0] : nullptr, log_blk.m_dual_plane ? &upsampled_weights[1][0] : nullptr); + + pUpsampled_weights = &upsampled_weights[0]; + } + } + + // Decode texels + const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX; + + const uint8_t *pPart = &g_texel_partitions[log_blk.m_partition_id][0][0]; // [seed][y][x] + + const bool large_block = (num_blk_pixels >= 31); + uint32_t part_shift = (log_blk.m_num_partitions == 3) ? 2 : 0; + part_shift += large_block * 4; + + //uint32_t pixel_index = 0; + + if (log_blk.m_num_partitions == 1) + { + // alpha, 1 subset + int le0 = endpoints[0][0][0], he0 = endpoints[0][0][1]; + int le1 = endpoints[0][1][0], he1 = endpoints[0][1][1]; + int le2 = endpoints[0][2][0], he2 = endpoints[0][2][1]; + int le3 = endpoints[0][3][0], he3 = endpoints[0][3][1]; + + if (dec_mode == cDecodeModeSRGB8) + { + le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80; + le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80; + le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80; + le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80; + } + else + { + le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0; + le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1; + le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2; + le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3; + } + + // no subsets + if (!any_alpha) + { + if (!log_blk.m_dual_plane) + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + else + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + } + else // (!any_alpha) + { + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + const uint32_t k3 = weight_interpolate(le3, he3, w3); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8); + + } // x + } // y + } + } + else + { + for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++) + { + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1]; + + if (dec_mode == cDecodeModeSRGB8) + { + le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80; + le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80; + le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80; + le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80; + } + else + { + le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0; + le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1; + le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2; + le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3; + } + + endpoints[subset][0][0] = le0, endpoints[subset][0][1] = he0; + endpoints[subset][1][0] = le1, endpoints[subset][1][1] = he1; + endpoints[subset][2][0] = le2, endpoints[subset][2][1] = he2; + endpoints[subset][3][0] = le3, endpoints[subset][3][1] = he3; + } + + // subsets + if (!any_alpha) + { + // no alpha, sRGB + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t v = pPart[y * 12 + x]; + const uint32_t subset = (v >> part_shift) & 3; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255; + } // x + } // y + } + else + { + // alpha + for (uint32_t y = start_y; y < end_y; y++) + { + for (uint32_t x = start_x; x < end_x; x++) + { + const uint32_t pixel_index = x + y * blk_width; + + const uint32_t v = pPart[y * 12 + x]; + const uint32_t subset = (v >> part_shift) & 3; + + const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index]; + const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index]; + + int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1]; + int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1]; + int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1]; + int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1]; + + const uint32_t k0 = weight_interpolate(le0, he0, w0); + const uint32_t k1 = weight_interpolate(le1, he1, w1); + const uint32_t k2 = weight_interpolate(le2, he2, w2); + const uint32_t k3 = weight_interpolate(le3, he3, w3); + + ((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8); + ((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8); + + } // x + } // y + + } + + } // if (log_blk.m_num_partitions == 1) + + return true; + } + //------------------------------------------------ // Physical to logical block decoding @@ -3220,7 +4075,7 @@ namespace astc_helpers return *this; } }; - + static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk) { if (bits.get_bits(10, 2) != 0b11) @@ -3232,9 +4087,9 @@ namespace astc_helpers const uint32_t min_t = bits.next_bits(bit_ofs, 13); const uint32_t max_t = bits.next_bits(bit_ofs, 13); assert(bit_ofs == 64); - + const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF); - + if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t))) return false; @@ -3256,7 +4111,7 @@ namespace astc_helpers if (is_half_inf_or_nan(log_blk.m_solid_color[c])) return false; } - + return true; } @@ -3269,7 +4124,7 @@ namespace astc_helpers { // Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs; { 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2 - { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2 + { 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2 { 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8 { 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6 @@ -3291,14 +4146,14 @@ namespace astc_helpers // Reserved if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111)) { - if (bits.get_bits(2, 4) != 0b1111) + if (bits.get_bits(2, 4) != 0b1111) return false; } // Void extent if (bits.get_bits(0, 9) == 0b111111100) return decode_void_extent(bits, log_blk); - + // Check rows const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2); const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1); @@ -3344,7 +4199,7 @@ namespace astc_helpers if (r.Dp_ofs >= 0) Dp = bits.get_bits(r.Dp_ofs, 1) != 0; - + if (r.W_size) W += bits.get_bits(r.W_ofs, r.W_size); @@ -3353,7 +4208,7 @@ namespace astc_helpers assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM)); assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM)); - + int p0 = bits.get_bits(r.p0_ofs, 1); int p1 = bits.get_bits(r.p1_ofs, 1); int p2 = bits.get_bits(r.p2_ofs, 1); @@ -3361,10 +4216,10 @@ namespace astc_helpers uint32_t p = p0 | (p1 << 1) | (p2 << 2); if (p < 2) return false; - + log_blk.m_grid_width = (uint8_t)W; log_blk.m_grid_height = (uint8_t)H; - + log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS)); assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); @@ -3480,7 +4335,7 @@ namespace astc_helpers static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs) { assert(num_vals && (ise_range < TOTAL_ISE_RANGES)); - + const uint32_t bits_per_val = g_ise_range_table[ise_range][0]; if (g_ise_range_table[ise_range][1]) @@ -3521,24 +4376,24 @@ namespace astc_helpers return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs); } - + // Decodes a physical ASTC block to a logical ASTC block. // blk_width/blk_height are only used to validate the weight grid's dimensions. bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height) { assert(is_valid_block_size(blk_width, blk_height)); - + const uint8_t* pS = (uint8_t*)pASTC_block; log_blk.clear(); log_blk.m_error_flag = true; - + const uint128 bits( (uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32), (uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32)); - + const uint128 rev_bits(bits.get_reversed_bits()); - + if (!decode_config(bits, log_blk)) return false; @@ -3552,16 +4407,16 @@ namespace astc_helpers // Check grid dimensions if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) return false; - + // Now we have the grid width/height, dual plane, weight ISE range - + const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height); const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range); - + // 18.24 Illegal Encodings if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) return false; - + const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits; uint32_t total_extra_bits = 0; @@ -3598,9 +4453,9 @@ namespace astc_helpers return false; uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits; - + uint32_t c[4] = { 0 }, m[4] = { 0 }; - + cem_bits >>= 2; for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1) c[i] = cem_bits & 1; @@ -3666,7 +4521,7 @@ namespace astc_helpers // config+num_parts+total_extra_bits (CEM extra+CCS) uint32_t total_config_bits = config_bit_pos + total_extra_bits; - + // Compute number of remaining bits in block const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; if (num_remaining_bits < 0) @@ -3681,6 +4536,7 @@ namespace astc_helpers return false; // Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block + // TODO: Optimize int endpoint_ise_range = -1; for (int k = 20; k > 0; k--) { @@ -3709,6 +4565,289 @@ namespace astc_helpers return true; } + // Misc. helpers + + uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t i) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height)); + + const uint32_t idx = i * num_planes + plane_index; + assert(idx < MAX_GRID_WEIGHTS); + + return log_block.m_weights[idx]; + } + + uint8_t &get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t i) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height)); + + const uint32_t idx = i * num_planes + plane_index; + assert(idx < MAX_GRID_WEIGHTS); + + return log_block.m_weights[idx]; + } + + void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + + const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height; + for (uint32_t i = 0; i < num_weights; i++) + pWeights[i] = log_block.m_weights[i * num_planes + plane_index]; + } + + void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index) + { + const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1; + assert(plane_index < num_planes); + + const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height; + for (uint32_t i = 0; i < num_weights; i++) + log_block.m_weights[i * num_planes + plane_index] = pWeights[i]; + } + + uint32_t get_total_weights(const log_astc_block& log_block) + { + return (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); + } + + // Returns a pointer to the beginning of a partition's/subset's endpoint values. + uint8_t *get_endpoints(log_astc_block& log_block, uint32_t partition_index) + { + assert(partition_index < log_block.m_num_partitions); + + uint32_t ofs = 0; + + for (uint32_t i = 0; i != partition_index; ++i) + ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]); + + assert(ofs < MAX_ENDPOINTS); + + return log_block.m_endpoints + ofs; + } + + const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index) + { + assert(partition_index < log_block.m_num_partitions); + + uint32_t ofs = 0; + + for (uint32_t i = 0; i != partition_index; ++i) + ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]); + + assert(ofs < MAX_ENDPOINTS); + + return log_block.m_endpoints + ofs; + } + + const char* get_cem_name(uint32_t cem_index) + { + static const char *s_cem_names[16] = + { + "CEM_LDR_LUM_DIRECT (0)", + "CEM_LDR_LUM_BASE_PLUS_OFS (1)", + "CEM_HDR_LUM_LARGE_RANGE (2)", + "CEM_HDR_LUM_SMALL_RANGE (3)", + "CEM_LDR_LUM_ALPHA_DIRECT (4)", + "CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS (5)", + "CEM_LDR_RGB_BASE_SCALE (6)", + "CEM_HDR_RGB_BASE_SCALE (7)", + "CEM_LDR_RGB_DIRECT (8)", + "CEM_LDR_RGB_BASE_PLUS_OFFSET (9)", + "CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A (10)", + "CEM_HDR_RGB (11)", + "CEM_LDR_RGBA_DIRECT (12)", + "CEM_LDR_RGBA_BASE_PLUS_OFFSET (13)", + "CEM_HDR_RGB_LDR_ALPHA (14)", + "CEM_HDR_RGB_HDR_ALPHA (15)" + }; + + assert(cem_index < std::size(s_cem_names)); + const char *p = s_cem_names[cem_index]; + assert(p); + return p; + } + + bool cem_is_ldr_direct(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT); + } + + bool cem_is_ldr_base_scale(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_BASE_SCALE) || (cem_index == CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A); + } + + bool cem_is_ldr_base_plus_ofs(uint32_t cem_index) + { + return (cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET); + } + + bool cem_supports_bc(uint32_t cem) + { + switch (cem) + { + case CEM_LDR_RGB_DIRECT: + case CEM_LDR_RGBA_DIRECT: + case CEM_LDR_RGB_BASE_PLUS_OFFSET: + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: + return true; + default: + break; + } + return false; + } + + // input: + // a=[0,255] + // b=[0,255] + // output: + // a=from, converted to -32 to 31 + // b=to, shifted right by 1 and 1 bit added to MSB, so [0,255] + void bit_transfer_signed_dec(int& a, int& b) + { + assert((a >= 0) && (a <= 255)); + assert((b >= 0) && (b <= 255)); + + b >>= 1; + b |= (a & 0x80); + + a >>= 1; + a &= 0x3F; + if ((a & 0x20) != 0) + a -= 0x40; + } + + // transfers a bit from b to a, prepares a for encoding + // input: + // a=[-32,31] (6-bits, 2's complement) + // b=[0,255] (8-bits) + // output: + // a=[0,255] (preserve top 2 bits) + // b=[0,255] + void bit_transfer_signed_enc(int& a, int& b) + { + assert((a >= -32) && (a <= 31)); + assert((b >= 0) && (b <= 255)); + + // extract MSB of b + bool bit_to_transfer = (b & 0x80) != 0; + b = (b << 1) & 0xFF; // 7 bits to 8 + + a &= 0x3F; // 6 bits + a <<= 1; // 6 to 7 bits + if (bit_to_transfer) + a |= 0x80; // set MSB + } + + // RGB or RGBA direct + bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert((cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT)); + (void)(cem_index); + + const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + uint8_t dequantized_endpoints[6]; + for (uint32_t i = 0; i < 6; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + uint32_t s0 = dequantized_endpoints[0] + dequantized_endpoints[2] + dequantized_endpoints[4]; + uint32_t s1 = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5]; + + return s1 < s0; + } + + // RGB or RGBA base plus offset + bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert((cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET)); + (void)(cem_index); + + const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + int dequantized_endpoints[6]; + for (uint32_t i = 0; i < 6; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + bit_transfer_signed_dec(dequantized_endpoints[1], dequantized_endpoints[0]); + bit_transfer_signed_dec(dequantized_endpoints[3], dequantized_endpoints[2]); + bit_transfer_signed_dec(dequantized_endpoints[5], dequantized_endpoints[4]); + + int s = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5]; + + return s < 0; + } + + bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index) + { + assert(is_cem_ldr(cem_index)); + + bool used_blue_contraction_flag = false; + + if ((cem_index == 8) || (cem_index == 12)) + used_blue_contraction_flag = cem8_or_12_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index); + else if ((cem_index == 9) || (cem_index == 13)) + used_blue_contraction_flag = cem9_or_13_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index); + + return used_blue_contraction_flag; + } + + uint32_t get_base_cem_without_alpha(uint32_t cem) + { + assert(is_cem_ldr(cem)); + + switch (cem) + { + case CEM_LDR_LUM_ALPHA_DIRECT: return CEM_LDR_LUM_DIRECT; + case CEM_LDR_RGBA_DIRECT: return CEM_LDR_RGB_DIRECT; + case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: return CEM_LDR_RGB_BASE_SCALE; + case CEM_LDR_RGBA_BASE_PLUS_OFFSET: return CEM_LDR_RGB_BASE_PLUS_OFFSET; + default: + break; + } + + return cem; + } + + int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta) + { + if (delta == 0) + return ise_val; + + uint32_t num_ise_levels = astc_helpers::get_ise_levels(endpoint_ise_range); + + const auto& ISE_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_rank; + const auto& rank_to_ISE = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_rank_to_ISE; + + int cur_rank = ISE_to_rank[ise_val]; + int new_rank = basisu::clamp(cur_rank + delta, 0, (int)num_ise_levels - 1); + + return rank_to_ISE[new_rank]; + } + + void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height) + { + assert(index < NUM_ASTC_BLOCK_SIZES); + + width = g_astc_block_sizes[index][0]; + height = g_astc_block_sizes[index][1]; + } + + int find_astc_block_size_index(uint32_t width, uint32_t height) + { + for (uint32_t i = 0; i < NUM_ASTC_BLOCK_SIZES; i++) + if ((width == g_astc_block_sizes[i][0]) && (height == g_astc_block_sizes[i][1])) + return i; + + return -1; + } + } // namespace astc_helpers #endif //BASISU_ASTC_HELPERS_IMPLEMENTATION diff --git a/external/basis_universal/transcoder/basisu_containers.h b/external/basis_universal/transcoder/basisu_containers.h index 88026c7198..dc816f3d10 100644 --- a/external/basis_universal/transcoder/basisu_containers.h +++ b/external/basis_universal/transcoder/basisu_containers.h @@ -159,8 +159,8 @@ namespace basisu static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } static inline void construct(T** p, T* init) { *p = init; } static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } - static inline void destruct(T** p) { p; } - static inline void destruct_array(T** p, size_t n) { p, n; } + static inline void destruct(T** p) { (void)p; } + static inline void destruct_array(T** p, size_t n) { (void)p, (void)n; } }; #define BASISU_DEFINE_BUILT_IN_TYPE(X) \ @@ -169,8 +169,8 @@ namespace basisu static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ - static inline void destruct(X* p) { p; } \ - static inline void destruct_array(X* p, size_t n) { p, n; } }; + static inline void destruct(X* p) { (void)p; } \ + static inline void destruct_array(X* p, size_t n) { (void)p, (void)n; } }; BASISU_DEFINE_BUILT_IN_TYPE(bool) BASISU_DEFINE_BUILT_IN_TYPE(char) @@ -272,7 +272,7 @@ namespace basisu size_t c = a + b; return c < a; } - + // Returns false on overflow, true if OK. template inline bool can_fit_into_size_t(T val) @@ -294,7 +294,7 @@ namespace basisu template class writable_span; - + template class readable_span { @@ -304,7 +304,7 @@ namespace basisu using const_pointer = const T*; using const_reference = const T&; using const_iterator = const T*; - + inline readable_span() : m_p(nullptr), m_size(0) @@ -941,7 +941,7 @@ namespace basisu inline iterator begin() const { return m_p; } inline iterator end() const { assert(m_p || !m_size); return m_p + m_size; } - + inline const_iterator cbegin() const { return m_p; } inline const_iterator cend() const { assert(m_p || !m_size); return m_p + m_size; } @@ -1506,7 +1506,7 @@ namespace basisu #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wclass-memaccess" #endif if ((m_p) && (other.m_p)) { @@ -1563,6 +1563,52 @@ namespace basisu set(ws); } + // mostly to ease porting from std::vector, not particularly optimized + inline void assign(size_t new_size, const T& init) + { + assert(!m_p || (&init < m_p) || (&init >= (m_p + m_size))); + + // Blow away existing contents + resize(0); + + if (new_size) + { + resize(new_size); + + for (size_t i = 0; i < new_size; ++i) + m_p[i] = init; + } + } + + // mostly to ease porting from std::vector, not particularly optimized + template + inline void assign(const R* pBegin, const R* pEnd) + { + assert(!m_p || + (reinterpret_cast(pEnd) <= reinterpret_cast(m_p)) || + (reinterpret_cast(pBegin) >= reinterpret_cast(m_p + m_size)) + ); + + // Blow away existing contents + resize(0); + + if ((!pBegin) || (!pEnd) || (pEnd <= pBegin)) + { + assert(0); + return; + } + + const size_t new_size = static_cast(static_cast(pEnd - pBegin)); + + if (new_size) + { + resize(new_size); + + for (size_t i = 0; i < new_size; ++i) + m_p[i] = static_cast(*pBegin++); + } + } + // Set contents of vector to contents of the readable span bool set(const readable_span& rs) { @@ -1647,7 +1693,7 @@ namespace basisu { #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wclass-memaccess" #endif if ((m_p) && (other.m_p)) memcpy((void *)m_p, other.m_p, other.m_size * sizeof(T)); @@ -2147,7 +2193,7 @@ namespace basisu if (!try_insert(p, obj)) container_abort("vector::insert() failed!\n"); } - + // push_front() isn't going to be very fast - it's only here for usability. inline void push_front(const T& obj) { @@ -2228,7 +2274,7 @@ namespace basisu #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wclass-memaccess" #endif memmove((void *)pDst, pSrc, num_to_move * sizeof(T)); @@ -2239,7 +2285,7 @@ namespace basisu } else { - // Type is not bitwise copyable or movable. + // Type is not bitwise copyable or movable. // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. T* pDst_end = pDst + num_to_move; @@ -2482,7 +2528,7 @@ namespace basisu { #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wclass-memaccess" #endif memset(m_p, *reinterpret_cast(&o), m_size); @@ -2770,6 +2816,7 @@ namespace basisu m_grow_threshold = 0; } + // Destroys elements/empties container but doesn't free memory. inline void reset() { if (!m_num_valid) @@ -2798,7 +2845,7 @@ namespace basisu } else if (sizeof(node) <= 16) { - memset(&m_values[0], 0, m_values.size_in_bytes()); + memset((void *)&m_values[0], 0, m_values.size_in_bytes()); } else { @@ -2829,6 +2876,11 @@ namespace basisu return m_num_valid; } + inline uint32_t size_u32() + { + return static_cast(m_num_valid); + } + inline size_t get_table_size() { return m_values.size(); @@ -3102,7 +3154,7 @@ namespace basisu { return try_insert(result, std::move(v.first), std::move(v.second)); } - + inline const_iterator find(const Key& k) const { return const_iterator(*this, find_index(k)); @@ -3183,12 +3235,12 @@ namespace basisu static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) { if (BASISU_IS_BITWISE_COPYABLE(Key)) - memcpy(&pDst->first, &k, sizeof(Key)); + memcpy((void *)&pDst->first, &k, sizeof(Key)); else scalar_type::construct(&pDst->first, k); if (BASISU_IS_BITWISE_COPYABLE(Value)) - memcpy(&pDst->second, &v, sizeof(Value)); + memcpy((void *)&pDst->second, &v, sizeof(Value)); else scalar_type::construct(&pDst->second, v); } @@ -3197,17 +3249,17 @@ namespace basisu { if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value))) { - memcpy(pDst, pSrc, sizeof(value_type)); + memcpy((void *)pDst, pSrc, sizeof(value_type)); } else { if (BASISU_IS_BITWISE_COPYABLE(Key)) - memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + memcpy((void *)&pDst->first, &pSrc->first, sizeof(Key)); else scalar_type::construct(&pDst->first, pSrc->first); if (BASISU_IS_BITWISE_COPYABLE(Value)) - memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + memcpy((void *)&pDst->second, &pSrc->second, sizeof(Value)); else scalar_type::construct(&pDst->second, pSrc->second); } @@ -3227,14 +3279,14 @@ namespace basisu if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) { - memcpy(pDst, pSrc, sizeof(node)); + memcpy((void *)pDst, pSrc, sizeof(node)); assert(pDst->state == cStateValid); } else { if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) - memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + memcpy((void*)&pDst->first, &pSrc->first, sizeof(Key)); else { new ((void*)&pDst->first) Key(std::move(pSrc->first)); @@ -3242,7 +3294,7 @@ namespace basisu } if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) - memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + memcpy((void*)&pDst->second, &pSrc->second, sizeof(Value)); else { new ((void*)&pDst->second) Value(std::move(pSrc->second)); @@ -3583,7 +3635,7 @@ namespace basisu // Not checking for is MOVABLE because the caller could later destruct k and/or v (what state do we set them to?) if (BASISU_IS_BITWISE_COPYABLE(Key)) { - memcpy(&pDst->first, &k, sizeof(Key)); + memcpy((void *)&pDst->first, (const void *)&k, sizeof(Key)); } else { @@ -3593,7 +3645,7 @@ namespace basisu if (BASISU_IS_BITWISE_COPYABLE(Value)) { - memcpy(&pDst->second, &v, sizeof(Value)); + memcpy((void *)&pDst->second, (const void*)&v, sizeof(Value)); } else { @@ -3721,11 +3773,11 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); -#endif +#endif va_end(args); return std::string(buf); @@ -3893,7 +3945,7 @@ namespace basisu std::size_t copy_size = std::min(list.size(), N); std::copy_n(list.begin(), copy_size, m_data); // Copy up to min(list.size(), N) - if (list.size() < N) + if (list.size() < N) { // Initialize the rest of the array std::fill(m_data + copy_size, m_data + N, T{}); @@ -3907,7 +3959,7 @@ namespace basisu return m_data[index]; } - BASISU_FORCE_INLINE const T& operator[](std::size_t index) const + BASISU_FORCE_INLINE const T& operator[](std::size_t index) const { if (index >= N) container_abort("fixed_array: Index out of bounds."); @@ -3950,26 +4002,26 @@ namespace basisu { return writable_span(m_data, N); } - + private: BASISU_FORCE_INLINE void initialize_array() { - if constexpr (std::is_integral::value || std::is_floating_point::value) + if constexpr (std::is_integral::value || std::is_floating_point::value) memset(m_data, 0, sizeof(m_data)); - else + else std::fill(m_data, m_data + N, T{}); } BASISU_FORCE_INLINE T& access_element(std::size_t index) { - if (index >= N) + if (index >= N) container_abort("fixed_array: Index out of bounds."); return m_data[index]; } BASISU_FORCE_INLINE const T& access_element(std::size_t index) const { - if (index >= N) + if (index >= N) container_abort("fixed_array: Index out of bounds."); return m_data[index]; } @@ -4046,6 +4098,9 @@ namespace basisu inline uint32_t get_width() const { return m_width; } inline uint32_t get_height() const { return m_height; } + inline uint32_t get_cols() const { return m_width; } + inline uint32_t get_rows() const { return m_height; } + inline const T& operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } inline T& operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } @@ -4054,9 +4109,23 @@ namespace basisu inline const T& operator[] (uint32_t i) const { return m_values[i]; } inline T& operator[] (uint32_t i) { return m_values[i]; } + inline const T& at(int x, int y) const { return (*this)((uint32_t)x, (uint32_t)y); } + inline T& at(int x, int y) { return (*this)((uint32_t)x, (uint32_t)y); } + inline const T& at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } inline T& at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline const T& at_row_col(int y, int x) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline T& at_row_col(int y, int x) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline void set_clipped(int x, int y, const T& val) + { + if ( ((uint32_t)x >= m_width) || ((uint32_t)y >= m_height) ) + return; + + m_values[x + y * m_width] = val; + } + void clear() { m_width = 0; @@ -4141,9 +4210,18 @@ namespace basisu return true; } + vector2D& resize_rows_cols(uint32_t rows, uint32_t cols) + { + return resize(cols, rows); + } + + bool try_resize_rows_cols(uint32_t rows, uint32_t cols) + { + return try_resize(cols, rows); + } + const vector2D& extract_block_clamped(T* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const { - // HACK HACK if (((src_x + w) > m_width) || ((src_y + h) > m_height)) { // Slower clamping case @@ -4165,8 +4243,87 @@ namespace basisu return *this; } + + const vector2D& extract_block_clamped(T* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h, uint32_t override_height) const + { + assert(override_height && (override_height <= m_height)); + + if (((src_x + w) > m_width) || ((src_y + h) > minimum(m_height, override_height))) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = at_clamped(src_x + x, minimum(src_y + y, override_height - 1)); + } + else + { + const T* pSrc = &m_values[src_x + src_y * m_width]; + + for (uint32_t y = 0; y < h; y++) + { + memcpy(pDst, pSrc, w * sizeof(T)); + pSrc += m_width; + pDst += w; + } + } + + return *this; + } }; + // Explictly primitive container intended for POD's, simple usage. + // push_back() and resize() will refuse to push anymore and just return when full. + template + class static_vector + { + T m_data[N]; + uint32_t m_size; + + public: + static_vector() : m_size(0) { } + + inline void reserve(size_t reserve_size) + { + (void)(reserve_size); + + assert(reserve_size <= N); + } + + inline void push_back(const T& value) + { + // Should never happen. + if (m_size >= N) + { + assert(0); + fprintf(stderr, "basisu::static_vector overflow!\n"); + return; + } + + m_data[m_size++] = value; + } + + inline std::size_t size() const { return m_size; } + inline uint32_t size_u32() const { return m_size; } + inline constexpr std::size_t capacity() const { return N; } + + inline bool empty() const { return !m_size; } + + inline T& operator[](std::size_t i) { return m_data[i]; } + inline const T& operator[](std::size_t i) const { return m_data[i]; } + + inline void resize(size_t new_size) + { + if (new_size > N) + { + assert(0); + fprintf(stderr, "basisu::static_vector overflow!\n"); + return; + } + + m_size = (uint32_t)new_size; + } + }; + } // namespace basisu namespace std diff --git a/external/basis_universal/transcoder/basisu_containers_impl.h b/external/basis_universal/transcoder/basisu_containers_impl.h index 4c85ed3dfa..2ac13029dc 100644 --- a/external/basis_universal/transcoder/basisu_containers_impl.h +++ b/external/basis_universal/transcoder/basisu_containers_impl.h @@ -14,7 +14,7 @@ namespace basisu #ifdef _MSC_VER __declspec(noreturn) #else - [[noreturn]] + [[noreturn]] #endif void container_abort(const char* pMsg, ...) { @@ -42,12 +42,12 @@ namespace basisu assert(m_size <= m_capacity); assert(min_new_capacity >= m_size); assert(element_size); - + // Basic sanity check min_new_capacity if (!can_fit_into_size_t((uint64_t)min_new_capacity * element_size)) { assert(0); - + if (nofail_flag) return false; @@ -100,7 +100,7 @@ namespace basisu } const size_t desired_size = static_cast(desired_size_u64); - + size_t actual_size = 0; BASISU_NOTE_UNUSED(actual_size); @@ -109,6 +109,7 @@ namespace basisu void* new_p = realloc(m_p, desired_size); if (!new_p) { + fprintf(stderr, "elemental_vector::increase_capacity: Allocation failed!\n"); assert(0); if (nofail_flag) @@ -133,7 +134,9 @@ namespace basisu void* new_p = malloc(desired_size); if (!new_p) { + fprintf(stderr, "elemental_vector::increase_capacity: Allocation failed!\n"); assert(0); + if (nofail_flag) return false; @@ -269,7 +272,7 @@ namespace basisu s.insert(i); k.push_back(i); } - + for (uint32_t i = 0; i < k.size(); i++) { uint32_t r = rand() ^ (rand() << 15); @@ -315,7 +318,7 @@ namespace basisu { typedef basisu::hash_map< uint32_t, basisu::vector > hm; hm q; - + basisu::vector a, b; a.push_back(1); b.push_back(2); diff --git a/external/basis_universal/transcoder/basisu_etc1_mods.inl b/external/basis_universal/transcoder/basisu_etc1_mods.inl new file mode 100644 index 0000000000..572a816d00 --- /dev/null +++ b/external/basis_universal/transcoder/basisu_etc1_mods.inl @@ -0,0 +1,257 @@ +static const uint8_t g_etc1_mod_tabs[255][8] = { +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,1,1,}, +{0,0,0,0,0,0,0,1,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,0,}, +{0,0,0,0,0,0,0,1,}, +{0,0,0,0,0,0,1,1,}, +{0,0,0,0,0,1,1,1,}, +{0,0,0,0,0,1,1,1,}, +{0,0,0,0,1,1,1,1,}, +{0,0,0,0,1,1,1,1,}, +{0,0,0,0,1,1,1,1,}, +{0,0,0,0,1,1,1,1,}, +{0,0,0,1,1,1,1,1,}, +{0,0,0,1,1,1,1,1,}, +{0,0,0,1,1,1,1,1,}, +{0,0,0,1,1,1,1,2,}, +{0,0,0,1,1,1,2,2,}, +{0,0,0,1,1,1,2,2,}, +{0,0,0,1,1,2,2,2,}, +{0,0,1,1,1,2,2,2,}, +{0,0,1,1,1,2,2,2,}, +{0,0,1,1,1,2,2,2,}, +{0,0,1,1,1,2,2,2,}, +{0,0,1,1,2,2,2,2,}, +{0,0,1,1,2,2,2,2,}, +{0,0,1,1,2,2,2,2,}, +{0,0,1,1,2,2,2,2,}, +{0,0,1,1,2,2,2,2,}, +{0,0,1,1,2,2,2,3,}, +{0,0,1,1,2,2,3,3,}, +{0,0,1,2,2,2,3,3,}, +{0,0,1,2,2,2,3,3,}, +{0,0,1,2,2,2,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,2,3,3,3,}, +{0,1,1,2,3,3,3,3,}, +{0,1,1,2,3,3,3,3,}, +{0,1,1,2,3,3,3,3,}, +{0,1,2,2,3,3,3,3,}, +{0,1,2,2,3,3,3,4,}, +{0,1,2,2,3,3,3,4,}, +{0,1,2,2,3,3,4,4,}, +{0,1,2,2,3,3,4,4,}, +{0,1,2,2,3,3,4,4,}, +{0,1,2,2,3,3,4,4,}, +{0,1,2,2,3,3,4,4,}, +{0,1,2,2,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,3,4,4,4,}, +{0,1,2,3,4,4,4,4,}, +{0,1,2,3,4,4,4,5,}, +{0,1,2,3,4,4,4,5,}, +{0,1,2,3,4,4,4,5,}, +{0,1,2,3,4,4,5,5,}, +{0,1,2,3,4,4,5,5,}, +{0,1,2,3,4,4,5,5,}, +{0,1,2,3,4,4,5,5,}, +{0,2,2,3,4,4,5,5,}, +{0,2,2,3,4,4,5,5,}, +{0,2,2,3,4,4,5,5,}, +{0,2,3,3,4,5,5,5,}, +{0,2,3,3,4,5,5,5,}, +{0,2,3,3,4,5,5,5,}, +{0,2,3,3,4,5,5,5,}, +{1,2,3,3,4,5,5,5,}, +{1,2,3,3,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,5,}, +{1,2,3,4,4,5,5,6,}, +{1,2,3,4,5,5,5,6,}, +{1,2,3,4,5,5,5,6,}, +{1,2,3,4,5,5,5,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,5,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,3,4,5,6,6,6,}, +{1,2,4,4,5,6,6,6,}, +{1,2,4,4,5,6,6,6,}, +{1,2,4,4,5,6,6,6,}, +{1,2,4,4,5,6,6,6,}, +{1,2,4,5,5,6,6,6,}, +{1,2,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,5,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,6,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,6,7,}, +{1,3,4,5,6,6,7,7,}, +{1,3,4,5,6,6,7,7,}, +{1,3,4,5,6,6,7,7,}, +{1,3,4,5,6,6,7,7,}, +{1,3,4,5,6,6,7,7,}, +{1,3,4,6,6,6,7,7,}, +{1,3,5,6,6,6,7,7,}, +{1,3,5,6,6,6,7,7,}, +{1,3,5,6,6,6,7,7,}, +{2,3,5,6,6,6,7,7,}, +{2,3,5,6,6,6,7,7,}, +{2,3,5,6,6,6,7,7,}, +{2,3,5,6,6,6,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,3,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,6,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,5,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,4,6,6,7,7,7,7,}, +{2,5,6,6,7,7,7,7,}, +{2,5,6,6,7,7,7,7,}, +{2,5,6,6,7,7,7,7,}, +{2,5,6,6,7,7,7,7,}, +{2,5,6,6,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,}, +{2,5,6,7,7,7,7,7,} +}; diff --git a/external/basis_universal/transcoder/basisu_file_headers.h b/external/basis_universal/transcoder/basisu_file_headers.h index ddd117a0c9..10462774f4 100644 --- a/external/basis_universal/transcoder/basisu_file_headers.h +++ b/external/basis_universal/transcoder/basisu_file_headers.h @@ -1,5 +1,5 @@ // basis_file_headers.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ namespace basist basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 or 6x6 pixels. The slice's pixel resolution may or may not be a power of 2. - basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes @@ -59,16 +59,16 @@ namespace basist // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) cBASISHeaderFlagHasAlphaSlices = 4, - // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. cBASISHeaderFlagUsesGlobalCodebook = 8, - // Set if the texture data is sRGB, otherwise it's linear. + // Set if the texture data is sRGB, otherwise it's linear. // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. - // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) enum basis_texture_type { @@ -88,14 +88,113 @@ namespace basist enum class basis_tex_format { + // Original LDR formats cETC1S = 0, - cUASTC4x4 = 1, + cUASTC_LDR_4x4 = 1, + + // HDR formats cUASTC_HDR_4x4 = 2, cASTC_HDR_6x6 = 3, - cASTC_HDR_6x6_INTERMEDIATE = 4, + cUASTC_HDR_6x6_INTERMEDIATE = 4, // TODO: rename to UASTC_HDR_6x6 + + // XUASTC (supercompressed) LDR variants (the standard ASTC block sizes) + cXUASTC_LDR_4x4 = 5, + cXUASTC_LDR_5x4 = 6, + cXUASTC_LDR_5x5 = 7, + cXUASTC_LDR_6x5 = 8, + + cXUASTC_LDR_6x6 = 9, + cXUASTC_LDR_8x5 = 10, + cXUASTC_LDR_8x6 = 11, + cXUASTC_LDR_10x5 = 12, + + cXUASTC_LDR_10x6 = 13, + cXUASTC_LDR_8x8 = 14, + cXUASTC_LDR_10x8 = 15, + cXUASTC_LDR_10x10 = 16, + + cXUASTC_LDR_12x10 = 17, + cXUASTC_LDR_12x12 = 18, + + // Standard (non-supercompressed) ASTC LDR variants (the standard ASTC block sizes) + cASTC_LDR_4x4 = 19, + cASTC_LDR_5x4 = 20, + cASTC_LDR_5x5 = 21, + cASTC_LDR_6x5 = 22, + + cASTC_LDR_6x6 = 23, + cASTC_LDR_8x5 = 24, + cASTC_LDR_8x6 = 25, + cASTC_LDR_10x5 = 26, + + cASTC_LDR_10x6 = 27, + cASTC_LDR_8x8 = 28, + cASTC_LDR_10x8 = 29, + cASTC_LDR_10x10 = 30, + + cASTC_LDR_12x10 = 31, + cASTC_LDR_12x12 = 32, + cTotalFormats }; + // True if the basis_tex_format is XUASTC LDR 4x4-12x12. + inline bool basis_tex_format_is_xuastc_ldr(basis_tex_format tex_fmt) + { + return ((uint32_t)tex_fmt >= (uint32_t)basis_tex_format::cXUASTC_LDR_4x4) && ((uint32_t)tex_fmt <= (uint32_t)basis_tex_format::cXUASTC_LDR_12x12); + } + + // True if the basis_tex_format is ASTC LDR 4x4-12x12. + inline bool basis_tex_format_is_astc_ldr(basis_tex_format tex_fmt) + { + return ((uint32_t)tex_fmt >= (uint32_t)basis_tex_format::cASTC_LDR_4x4) && ((uint32_t)tex_fmt <= (uint32_t)basis_tex_format::cASTC_LDR_12x12); + } + + inline void get_basis_tex_format_block_size(basis_tex_format tex_fmt, uint32_t &width, uint32_t &height) + { + switch (tex_fmt) + { + case basis_tex_format::cETC1S: width = 4; height = 4; break; + case basis_tex_format::cUASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cUASTC_HDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cASTC_HDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: width = 6; height = 6; break; + case basis_tex_format::cXUASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cXUASTC_LDR_5x4: width = 5; height = 4; break; + case basis_tex_format::cXUASTC_LDR_5x5: width = 5; height = 5; break; + case basis_tex_format::cXUASTC_LDR_6x5: width = 6; height = 5; break; + case basis_tex_format::cXUASTC_LDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cXUASTC_LDR_8x5: width = 8; height = 5; break; + case basis_tex_format::cXUASTC_LDR_8x6: width = 8; height = 6; break; + case basis_tex_format::cXUASTC_LDR_10x5: width = 10; height = 5; break; + case basis_tex_format::cXUASTC_LDR_10x6: width = 10; height = 6; break; + case basis_tex_format::cXUASTC_LDR_8x8: width = 8; height = 8; break; + case basis_tex_format::cXUASTC_LDR_10x8: width = 10; height = 8; break; + case basis_tex_format::cXUASTC_LDR_10x10: width = 10; height = 10; break; + case basis_tex_format::cXUASTC_LDR_12x10: width = 12; height = 10; break; + case basis_tex_format::cXUASTC_LDR_12x12: width = 12; height = 12; break; + case basis_tex_format::cASTC_LDR_4x4: width = 4; height = 4; break; + case basis_tex_format::cASTC_LDR_5x4: width = 5; height = 4; break; + case basis_tex_format::cASTC_LDR_5x5: width = 5; height = 5; break; + case basis_tex_format::cASTC_LDR_6x5: width = 6; height = 5; break; + case basis_tex_format::cASTC_LDR_6x6: width = 6; height = 6; break; + case basis_tex_format::cASTC_LDR_8x5: width = 8; height = 5; break; + case basis_tex_format::cASTC_LDR_8x6: width = 8; height = 6; break; + case basis_tex_format::cASTC_LDR_10x5: width = 10; height = 5; break; + case basis_tex_format::cASTC_LDR_10x6: width = 10; height = 6; break; + case basis_tex_format::cASTC_LDR_8x8: width = 8; height = 8; break; + case basis_tex_format::cASTC_LDR_10x8: width = 10; height = 8; break; + case basis_tex_format::cASTC_LDR_10x10: width = 10; height = 10; break; + case basis_tex_format::cASTC_LDR_12x10: width = 12; height = 10; break; + case basis_tex_format::cASTC_LDR_12x12: width = 12; height = 12; break; + default: + assert(0); + width = 0; + height = 0; + break; + } + } + struct basis_file_header { enum @@ -115,7 +214,7 @@ namespace basist basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type @@ -125,11 +224,11 @@ namespace basist basisu::packed_uint<4> m_userdata0; // For client use basisu::packed_uint<4> m_userdata1; // For client use - basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes - basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes @@ -137,7 +236,7 @@ namespace basist basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header - + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use }; diff --git a/external/basis_universal/transcoder/basisu_idct.h b/external/basis_universal/transcoder/basisu_idct.h new file mode 100644 index 0000000000..33b77d0013 --- /dev/null +++ b/external/basis_universal/transcoder/basisu_idct.h @@ -0,0 +1,1446 @@ +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 2, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_2( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s1 += 7.071067691e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s1 += -7.071067691e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 3, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_3( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 5.773502588e-01f * v; + s1 += 5.773502588e-01f * v; + s2 += 5.773502588e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 7.071067691e-01f * v; + s2 += -7.071068883e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s1 += -8.164966106e-01f * v; + s2 += 4.082486033e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 4, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_4( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 5.000000000e-01f * v; + s1 += 5.000000000e-01f * v; + s2 += 5.000000000e-01f * v; + s3 += 5.000000000e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 6.532814503e-01f * v; + s1 += 2.705980539e-01f * v; + s2 += -2.705981135e-01f * v; + s3 += -6.532815099e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.999999702e-01f * v; + s1 += -4.999999702e-01f * v; + s2 += -4.999999106e-01f * v; + s3 += 5.000001788e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 2.705980539e-01f * v; + s1 += -6.532814503e-01f * v; + s2 += 6.532815099e-01f * v; + s3 += -2.705983818e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 5, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_5( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 4.472135901e-01f * v; + s1 += 4.472135901e-01f * v; + s2 += 4.472135901e-01f * v; + s3 += 4.472135901e-01f * v; + s4 += 4.472135901e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 6.015009880e-01f * v; + s1 += 3.717480302e-01f * v; + s3 += -3.717481494e-01f * v; + s4 += -6.015009284e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 5.116672516e-01f * v; + s1 += -1.954395324e-01f * v; + s2 += -6.324555278e-01f * v; + s3 += -1.954392791e-01f * v; + s4 += 5.116672516e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.717480302e-01f * v; + s1 += -6.015009284e-01f * v; + s3 += 6.015008688e-01f * v; + s4 += -3.717483282e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 1.954394877e-01f * v; + s1 += -5.116672516e-01f * v; + s2 += 6.324555278e-01f * v; + s3 += -5.116675496e-01f * v; + s4 += 1.954394132e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 6, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_6( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082483053e-01f * v; + s1 += 4.082483053e-01f * v; + s2 += 4.082483053e-01f * v; + s3 += 4.082483053e-01f * v; + s4 += 4.082483053e-01f * v; + s5 += 4.082483053e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 5.576775074e-01f * v; + s1 += 4.082482755e-01f * v; + s2 += 1.494291872e-01f * v; + s3 += -1.494293064e-01f * v; + s4 += -4.082482755e-01f * v; + s5 += -5.576775670e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.999999702e-01f * v; + s2 += -5.000000596e-01f * v; + s3 += -4.999999106e-01f * v; + s5 += 5.000000596e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s1 += -4.082482755e-01f * v; + s2 += -4.082483053e-01f * v; + s3 += 4.082484245e-01f * v; + s4 += 4.082480669e-01f * v; + s5 += -4.082485437e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886750996e-01f * v; + s1 += -5.773502588e-01f * v; + s2 += 2.886753380e-01f * v; + s3 += 2.886748910e-01f * v; + s4 += -5.773502588e-01f * v; + s5 += 2.886753976e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 1.494291872e-01f * v; + s1 += -4.082483053e-01f * v; + s2 += 5.576775074e-01f * v; + s3 += -5.576776266e-01f * v; + s4 += 4.082483053e-01f * v; + s5 += -1.494295001e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 7, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_7( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.779644668e-01f * v; + s1 += 3.779644668e-01f * v; + s2 += 3.779644668e-01f * v; + s3 += 3.779644668e-01f * v; + s4 += 3.779644668e-01f * v; + s5 += 3.779644668e-01f * v; + s6 += 3.779644668e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 5.211208463e-01f * v; + s1 += 4.179065228e-01f * v; + s2 += 2.319205552e-01f * v; + s4 += -2.319206595e-01f * v; + s5 += -4.179066122e-01f * v; + s6 += -5.211208463e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.815880954e-01f * v; + s1 += 1.189424619e-01f * v; + s2 += -3.332694173e-01f * v; + s3 += -5.345224738e-01f * v; + s4 += -3.332692087e-01f * v; + s5 += 1.189427450e-01f * v; + s6 += 4.815880954e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.179065228e-01f * v; + s1 += -2.319206595e-01f * v; + s2 += -5.211208463e-01f * v; + s4 += 5.211208463e-01f * v; + s5 += 2.319205403e-01f * v; + s6 += -4.179067314e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.332692981e-01f * v; + s1 += -4.815880954e-01f * v; + s2 += -1.189422309e-01f * v; + s3 += 5.345224738e-01f * v; + s4 += -1.189426631e-01f * v; + s5 += -4.815878570e-01f * v; + s6 += 3.332692981e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 2.319205552e-01f * v; + s1 += -5.211208463e-01f * v; + s2 += 4.179064631e-01f * v; + s4 += -4.179064035e-01f * v; + s5 += 5.211209059e-01f * v; + s6 += -2.319207191e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 1.189424619e-01f * v; + s1 += -3.332692087e-01f * v; + s2 += 4.815881252e-01f * v; + s3 += -5.345224738e-01f * v; + s4 += 4.815881550e-01f * v; + s5 += -3.332694471e-01f * v; + s6 += 1.189431697e-01f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 8, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_8( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s1 += 3.535533845e-01f * v; + s2 += 3.535533845e-01f * v; + s3 += 3.535533845e-01f * v; + s4 += 3.535533845e-01f * v; + s5 += 3.535533845e-01f * v; + s6 += 3.535533845e-01f * v; + s7 += 3.535533845e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.903926253e-01f * v; + s1 += 4.157347977e-01f * v; + s2 += 2.777850926e-01f * v; + s3 += 9.754511714e-02f * v; + s4 += -9.754516184e-02f * v; + s5 += -2.777851820e-01f * v; + s6 += -4.157348275e-01f * v; + s7 += -4.903926551e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.619397521e-01f * v; + s1 += 1.913417131e-01f * v; + s2 += -1.913417578e-01f * v; + s3 += -4.619398117e-01f * v; + s4 += -4.619397521e-01f * v; + s5 += -1.913415641e-01f * v; + s6 += 1.913418025e-01f * v; + s7 += 4.619397819e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.157347977e-01f * v; + s1 += -9.754516184e-02f * v; + s2 += -4.903926551e-01f * v; + s3 += -2.777850032e-01f * v; + s4 += 2.777852118e-01f * v; + s5 += 4.903926253e-01f * v; + s6 += 9.754503518e-02f * v; + s7 += -4.157348871e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s1 += -3.535533845e-01f * v; + s2 += -3.535533249e-01f * v; + s3 += 3.535535038e-01f * v; + s4 += 3.535533845e-01f * v; + s5 += -3.535536230e-01f * v; + s6 += -3.535532653e-01f * v; + s7 += 3.535534143e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 2.777850926e-01f * v; + s1 += -4.903926551e-01f * v; + s2 += 9.754520655e-02f * v; + s3 += 4.157346785e-01f * v; + s4 += -4.157348871e-01f * v; + s5 += -9.754510969e-02f * v; + s6 += 4.903926551e-01f * v; + s7 += -2.777854204e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 1.913417131e-01f * v; + s1 += -4.619397521e-01f * v; + s2 += 4.619397819e-01f * v; + s3 += -1.913419515e-01f * v; + s4 += -1.913414896e-01f * v; + s5 += 4.619396627e-01f * v; + s6 += -4.619398713e-01f * v; + s7 += 1.913419515e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 9.754511714e-02f * v; + s1 += -2.777850032e-01f * v; + s2 += 4.157346785e-01f * v; + s3 += -4.903925955e-01f * v; + s4 += 4.903927147e-01f * v; + s5 += -4.157347977e-01f * v; + s6 += 2.777855694e-01f * v; + s7 += -9.754577279e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 9, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_9( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.333333433e-01f * v; + s1 += 3.333333433e-01f * v; + s2 += 3.333333433e-01f * v; + s3 += 3.333333433e-01f * v; + s4 += 3.333333433e-01f * v; + s5 += 3.333333433e-01f * v; + s6 += 3.333333433e-01f * v; + s7 += 3.333333433e-01f * v; + s8 += 3.333333433e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.642428160e-01f * v; + s1 += 4.082482755e-01f * v; + s2 += 3.030129671e-01f * v; + s3 += 1.612297893e-01f * v; + s5 += -1.612298936e-01f * v; + s6 += -3.030129969e-01f * v; + s7 += -4.082482755e-01f * v; + s8 += -4.642428458e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.429753423e-01f * v; + s1 += 2.357022464e-01f * v; + s2 += -8.185859025e-02f * v; + s3 += -3.611168861e-01f * v; + s4 += -4.714045227e-01f * v; + s5 += -3.611167669e-01f * v; + s6 += -8.185851574e-02f * v; + s7 += 2.357022166e-01f * v; + s8 += 4.429753721e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 4.082482755e-01f * v; + s2 += -4.082482755e-01f * v; + s3 += -4.082482159e-01f * v; + s5 += 4.082483649e-01f * v; + s6 += 4.082482755e-01f * v; + s8 += -4.082485437e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.611168265e-01f * v; + s1 += -2.357022911e-01f * v; + s2 += -4.429753125e-01f * v; + s3 += 8.185874671e-02f * v; + s4 += 4.714045227e-01f * v; + s5 += 8.185835928e-02f * v; + s6 += -4.429753721e-01f * v; + s7 += -2.357023507e-01f * v; + s8 += 3.611169457e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.030129671e-01f * v; + s1 += -4.082482755e-01f * v; + s2 += -1.612298042e-01f * v; + s3 += 4.642428458e-01f * v; + s5 += -4.642428160e-01f * v; + s6 += 1.612296849e-01f * v; + s7 += 4.082482159e-01f * v; + s8 += -3.030129373e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.357022464e-01f * v; + s1 += -4.714045227e-01f * v; + s2 += 2.357022166e-01f * v; + s3 += 2.357020825e-01f * v; + s4 += -4.714045227e-01f * v; + s5 += 2.357024848e-01f * v; + s6 += 2.357022017e-01f * v; + s7 += -4.714045227e-01f * v; + s8 += 2.357031256e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 1.612297893e-01f * v; + s1 += -4.082482159e-01f * v; + s2 += 4.642428458e-01f * v; + s3 += -3.030130565e-01f * v; + s5 += 3.030129075e-01f * v; + s6 += -4.642427862e-01f * v; + s7 += 4.082485735e-01f * v; + s8 += -1.612301171e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 8.185850084e-02f * v; + s1 += -2.357022166e-01f * v; + s2 += 3.611166775e-01f * v; + s3 += -4.429752231e-01f * v; + s4 += 4.714045227e-01f * v; + s5 += -4.429754615e-01f * v; + s6 += 3.611168563e-01f * v; + s7 += -2.357021123e-01f * v; + s8 += 8.185899258e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 10, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_10( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.162277639e-01f * v; + s1 += 3.162277639e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += 3.162277639e-01f * v; + s4 += 3.162277639e-01f * v; + s5 += 3.162277639e-01f * v; + s6 += 3.162277639e-01f * v; + s7 += 3.162277639e-01f * v; + s8 += 3.162277639e-01f * v; + s9 += 3.162277639e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.417076707e-01f * v; + s1 += 3.984702229e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += 2.030306906e-01f * v; + s4 += 6.995963305e-02f * v; + s5 += -6.995966285e-02f * v; + s6 += -2.030307651e-01f * v; + s7 += -3.162277639e-01f * v; + s8 += -3.984702528e-01f * v; + s9 += -4.417076707e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.253254235e-01f * v; + s1 += 2.628655434e-01f * v; + s3 += -2.628656328e-01f * v; + s4 += -4.253253937e-01f * v; + s5 += -4.253253639e-01f * v; + s6 += -2.628654838e-01f * v; + s8 += 2.628656626e-01f * v; + s9 += 4.253254235e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.984702229e-01f * v; + s1 += 6.995963305e-02f * v; + s2 += -3.162277639e-01f * v; + s3 += -4.417076409e-01f * v; + s4 += -2.030306011e-01f * v; + s5 += 2.030307949e-01f * v; + s6 += 4.417076707e-01f * v; + s7 += 3.162277639e-01f * v; + s8 += -6.995979697e-02f * v; + s9 += -3.984701931e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.618034124e-01f * v; + s1 += -1.381966174e-01f * v; + s2 += -4.472135901e-01f * v; + s3 += -1.381964386e-01f * v; + s4 += 3.618033826e-01f * v; + s5 += 3.618032932e-01f * v; + s6 += -1.381967962e-01f * v; + s7 += -4.472135901e-01f * v; + s8 += -1.381963789e-01f * v; + s9 += 3.618034124e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.162277639e-01f * v; + s1 += -3.162277639e-01f * v; + s2 += -3.162277043e-01f * v; + s3 += 3.162278533e-01f * v; + s4 += 3.162277639e-01f * v; + s5 += -3.162276745e-01f * v; + s6 += -3.162276447e-01f * v; + s7 += 3.162280619e-01f * v; + s8 += 3.162278533e-01f * v; + s9 += -3.162281811e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.628655434e-01f * v; + s1 += -4.253253937e-01f * v; + s3 += 4.253253639e-01f * v; + s4 += -2.628657520e-01f * v; + s5 += -2.628654242e-01f * v; + s6 += 4.253254235e-01f * v; + s8 += -4.253252745e-01f * v; + s9 += 2.628654540e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.030306906e-01f * v; + s1 += -4.417076409e-01f * v; + s2 += 3.162278533e-01f * v; + s3 += 6.995949894e-02f * v; + s4 += -3.984701633e-01f * v; + s5 += 3.984702528e-01f * v; + s6 += -6.996008009e-02f * v; + s7 += -3.162274361e-01f * v; + s8 += 4.417077899e-01f * v; + s9 += -2.030310780e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 1.381965876e-01f * v; + s1 += -3.618033826e-01f * v; + s2 += 4.472135901e-01f * v; + s3 += -3.618035913e-01f * v; + s4 += 1.381965429e-01f * v; + s5 += 1.381962299e-01f * v; + s6 += -3.618031442e-01f * v; + s7 += 4.472135901e-01f * v; + s8 += -3.618036509e-01f * v; + s9 += 1.381966770e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 6.995963305e-02f * v; + s1 += -2.030306011e-01f * v; + s2 += 3.162277639e-01f * v; + s3 += -3.984701633e-01f * v; + s4 += 4.417076409e-01f * v; + s5 += -4.417076409e-01f * v; + s6 += 3.984701931e-01f * v; + s7 += -3.162280619e-01f * v; + s8 += 2.030308247e-01f * v; + s9 += -6.995939463e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 11, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_11( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + float s10 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 3.015113473e-01f * v; + s1 += 3.015113473e-01f * v; + s2 += 3.015113473e-01f * v; + s3 += 3.015113473e-01f * v; + s4 += 3.015113473e-01f * v; + s5 += 3.015113473e-01f * v; + s6 += 3.015113473e-01f * v; + s7 += 3.015113473e-01f * v; + s8 += 3.015113473e-01f * v; + s9 += 3.015113473e-01f * v; + s10 += 3.015113473e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.220612943e-01f * v; + s1 += 3.878683746e-01f * v; + s2 += 3.222526908e-01f * v; + s3 += 2.305300087e-01f * v; + s4 += 1.201311573e-01f * v; + s6 += -1.201311946e-01f * v; + s7 += -2.305300087e-01f * v; + s8 += -3.222527206e-01f * v; + s9 += -3.878683746e-01f * v; + s10 += -4.220612943e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 4.091291726e-01f * v; + s1 += 2.792335451e-01f * v; + s2 += 6.068321317e-02f * v; + s3 += -1.771336049e-01f * v; + s4 += -3.587117195e-01f * v; + s5 += -4.264014363e-01f * v; + s6 += -3.587116897e-01f * v; + s7 += -1.771335900e-01f * v; + s8 += 6.068333238e-02f * v; + s9 += 2.792335451e-01f * v; + s10 += 4.091292024e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.878683746e-01f * v; + s1 += 1.201311573e-01f * v; + s2 += -2.305300087e-01f * v; + s3 += -4.220612943e-01f * v; + s4 += -3.222526908e-01f * v; + s6 += 3.222527504e-01f * v; + s7 += 4.220612645e-01f * v; + s8 += 2.305298299e-01f * v; + s9 += -1.201310679e-01f * v; + s10 += -3.878685534e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.587117195e-01f * v; + s1 += -6.068325043e-02f * v; + s2 += -4.091292024e-01f * v; + s3 += -2.792334855e-01f * v; + s4 += 1.771336049e-01f * v; + s5 += 4.264014363e-01f * v; + s6 += 1.771334559e-01f * v; + s7 += -2.792335153e-01f * v; + s8 += -4.091291428e-01f * v; + s9 += -6.068325043e-02f * v; + s10 += 3.587118387e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.222526908e-01f * v; + s1 += -2.305300087e-01f * v; + s2 += -3.878683448e-01f * v; + s3 += 1.201313213e-01f * v; + s4 += 4.220612645e-01f * v; + s6 += -4.220612943e-01f * v; + s7 += -1.201310530e-01f * v; + s8 += 3.878682852e-01f * v; + s9 += 2.305295914e-01f * v; + s10 += -3.222530484e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.792335451e-01f * v; + s1 += -3.587117195e-01f * v; + s2 += -1.771335900e-01f * v; + s3 += 4.091292024e-01f * v; + s4 += 6.068318710e-02f * v; + s5 += -4.264014363e-01f * v; + s6 += 6.068341061e-02f * v; + s7 += 4.091290832e-01f * v; + s8 += -1.771339774e-01f * v; + s9 += -3.587118387e-01f * v; + s10 += 2.792341411e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.305300087e-01f * v; + s1 += -4.220612943e-01f * v; + s2 += 1.201313213e-01f * v; + s3 += 3.222525418e-01f * v; + s4 += -3.878685534e-01f * v; + s6 += 3.878683150e-01f * v; + s7 += -3.222530484e-01f * v; + s8 += -1.201303899e-01f * v; + s9 += 4.220611751e-01f * v; + s10 += -2.305305302e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 1.771335304e-01f * v; + s1 += -4.091291726e-01f * v; + s2 += 3.587118089e-01f * v; + s3 += -6.068347394e-02f * v; + s4 += -2.792334855e-01f * v; + s5 += 4.264014363e-01f * v; + s6 += -2.792337239e-01f * v; + s7 += -6.068337709e-02f * v; + s8 += 3.587115407e-01f * v; + s9 += -4.091291726e-01f * v; + s10 += 1.771339774e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 1.201311573e-01f * v; + s1 += -3.222526908e-01f * v; + s2 += 4.220612645e-01f * v; + s3 += -3.878685534e-01f * v; + s4 += 2.305301726e-01f * v; + s6 += -2.305298299e-01f * v; + s7 += 3.878681958e-01f * v; + s8 += -4.220613837e-01f * v; + s9 += 3.222527504e-01f * v; + s10 += -1.201314703e-01f * v; + } + } + + { + float v = src[10 * src_stride]; + if (v != 0.0f) + { + s0 += 6.068321317e-02f * v; + s1 += -1.771335900e-01f * v; + s2 += 2.792334557e-01f * v; + s3 += -3.587115407e-01f * v; + s4 += 4.091290832e-01f * v; + s5 += -4.264014363e-01f * v; + s6 += 4.091292620e-01f * v; + s7 += -3.587118387e-01f * v; + s8 += 2.792330980e-01f * v; + s9 += -1.771344692e-01f * v; + s10 += 6.068423390e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; + dst[10 * dst_stride] = s10; +} + +// ------------------------------------------------------------ +// 1D ORTHONORMAL IDCT (DCT-III), SIZE 12, FLOAT +// out[x*dst_stride] = sum_k C[k][x] * src[k*src_stride] +// C[k][x] = alpha(k) * cos(pi * (2*x+1) * k / (2*N)), +// alpha(0) = sqrt(1/N), alpha(k>0) = sqrt(2/N) +static inline void idct_1d_12( + const float* src, int src_stride, + float* dst, int dst_stride) +{ + float s0 = 0.0f; + float s1 = 0.0f; + float s2 = 0.0f; + float s3 = 0.0f; + float s4 = 0.0f; + float s5 = 0.0f; + float s6 = 0.0f; + float s7 = 0.0f; + float s8 = 0.0f; + float s9 = 0.0f; + float s10 = 0.0f; + float s11 = 0.0f; + + { + float v = src[0 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886751294e-01f * v; + s1 += 2.886751294e-01f * v; + s2 += 2.886751294e-01f * v; + s3 += 2.886751294e-01f * v; + s4 += 2.886751294e-01f * v; + s5 += 2.886751294e-01f * v; + s6 += 2.886751294e-01f * v; + s7 += 2.886751294e-01f * v; + s8 += 2.886751294e-01f * v; + s9 += 2.886751294e-01f * v; + s10 += 2.886751294e-01f * v; + s11 += 2.886751294e-01f * v; + } + } + + { + float v = src[1 * src_stride]; + if (v != 0.0f) + { + s0 += 4.047556818e-01f * v; + s1 += 3.771722317e-01f * v; + s2 += 3.238851428e-01f * v; + s3 += 2.485257983e-01f * v; + s4 += 1.562298536e-01f * v; + s5 += 5.328707024e-02f * v; + s6 += -5.328710750e-02f * v; + s7 += -1.562298536e-01f * v; + s8 += -2.485258281e-01f * v; + s9 += -3.238851428e-01f * v; + s10 += -3.771722913e-01f * v; + s11 += -4.047556818e-01f * v; + } + } + + { + float v = src[2 * src_stride]; + if (v != 0.0f) + { + s0 += 3.943375647e-01f * v; + s1 += 2.886751294e-01f * v; + s2 += 1.056623980e-01f * v; + s3 += -1.056624874e-01f * v; + s4 += -2.886751294e-01f * v; + s5 += -3.943375945e-01f * v; + s6 += -3.943375647e-01f * v; + s7 += -2.886751592e-01f * v; + s8 += -1.056624129e-01f * v; + s9 += 1.056624204e-01f * v; + s10 += 2.886752486e-01f * v; + s11 += 3.943375647e-01f * v; + } + } + + { + float v = src[3 * src_stride]; + if (v != 0.0f) + { + s0 += 3.771722317e-01f * v; + s1 += 1.562298536e-01f * v; + s2 += -1.562298536e-01f * v; + s3 += -3.771722913e-01f * v; + s4 += -3.771722317e-01f * v; + s5 += -1.562297344e-01f * v; + s6 += 1.562299281e-01f * v; + s7 += 3.771722615e-01f * v; + s8 += 3.771722019e-01f * v; + s9 += 1.562297940e-01f * v; + s10 += -1.562300622e-01f * v; + s11 += -3.771722317e-01f * v; + } + } + + { + float v = src[4 * src_stride]; + if (v != 0.0f) + { + s0 += 3.535533845e-01f * v; + s2 += -3.535534441e-01f * v; + s3 += -3.535533547e-01f * v; + s5 += 3.535534739e-01f * v; + s6 += 3.535533845e-01f * v; + s8 += -3.535534143e-01f * v; + s9 += -3.535534143e-01f * v; + s11 += 3.535532951e-01f * v; + } + } + + { + float v = src[5 * src_stride]; + if (v != 0.0f) + { + s0 += 3.238851428e-01f * v; + s1 += -1.562298536e-01f * v; + s2 += -4.047556818e-01f * v; + s3 += -5.328698456e-02f * v; + s4 += 3.771722615e-01f * v; + s5 += 2.485257536e-01f * v; + s6 += -2.485258281e-01f * v; + s7 += -3.771722317e-01f * v; + s8 += 5.328687653e-02f * v; + s9 += 4.047557116e-01f * v; + s10 += 1.562295407e-01f * v; + s11 += -3.238854110e-01f * v; + } + } + + { + float v = src[6 * src_stride]; + if (v != 0.0f) + { + s0 += 2.886751294e-01f * v; + s1 += -2.886751294e-01f * v; + s2 += -2.886751592e-01f * v; + s3 += 2.886752486e-01f * v; + s4 += 2.886749804e-01f * v; + s5 += -2.886753380e-01f * v; + s6 += -2.886750400e-01f * v; + s7 += 2.886751592e-01f * v; + s8 += 2.886749506e-01f * v; + s9 += -2.886752486e-01f * v; + s10 += -2.886748612e-01f * v; + s11 += 2.886750698e-01f * v; + } + } + + { + float v = src[7 * src_stride]; + if (v != 0.0f) + { + s0 += 2.485257983e-01f * v; + s1 += -3.771722913e-01f * v; + s2 += -5.328698456e-02f * v; + s3 += 4.047556818e-01f * v; + s4 += -1.562300622e-01f * v; + s5 += -3.238852322e-01f * v; + s6 += 3.238853514e-01f * v; + s7 += 1.562295407e-01f * v; + s8 += -4.047557414e-01f * v; + s9 += 5.328752100e-02f * v; + s10 += 3.771720827e-01f * v; + s11 += -2.485256344e-01f * v; + } + } + + { + float v = src[8 * src_stride]; + if (v != 0.0f) + { + s0 += 2.041241378e-01f * v; + s1 += -4.082483053e-01f * v; + s2 += 2.041243017e-01f * v; + s3 += 2.041239887e-01f * v; + s4 += -4.082483053e-01f * v; + s5 += 2.041243464e-01f * v; + s6 += 2.041241080e-01f * v; + s7 += -4.082483053e-01f * v; + s8 += 2.041242570e-01f * v; + s9 += 2.041241974e-01f * v; + s10 += -4.082483053e-01f * v; + s11 += 2.041237950e-01f * v; + } + } + + { + float v = src[9 * src_stride]; + if (v != 0.0f) + { + s0 += 1.562298536e-01f * v; + s1 += -3.771722317e-01f * v; + s2 += 3.771722615e-01f * v; + s3 += -1.562300622e-01f * v; + s4 += -1.562296748e-01f * v; + s5 += 3.771723211e-01f * v; + s6 += -3.771723509e-01f * v; + s7 += 1.562300622e-01f * v; + s8 += 1.562293023e-01f * v; + s9 += -3.771721721e-01f * v; + s10 += 3.771724999e-01f * v; + s11 += -1.562300622e-01f * v; + } + } + + { + float v = src[10 * src_stride]; + if (v != 0.0f) + { + s0 += 1.056623980e-01f * v; + s1 += -2.886751592e-01f * v; + s2 += 3.943375647e-01f * v; + s3 += -3.943376541e-01f * v; + s4 += 2.886751592e-01f * v; + s5 += -1.056626216e-01f * v; + s6 += -1.056624576e-01f * v; + s7 += 2.886750400e-01f * v; + s8 += -3.943376839e-01f * v; + s9 += 3.943377137e-01f * v; + s10 += -2.886756361e-01f * v; + s11 += 1.056632623e-01f * v; + } + } + + { + float v = src[11 * src_stride]; + if (v != 0.0f) + { + s0 += 5.328707024e-02f * v; + s1 += -1.562297344e-01f * v; + s2 += 2.485257536e-01f * v; + s3 += -3.238852322e-01f * v; + s4 += 3.771723211e-01f * v; + s5 += -4.047556818e-01f * v; + s6 += 4.047556818e-01f * v; + s7 += -3.771722913e-01f * v; + s8 += 3.238852024e-01f * v; + s9 += -2.485264540e-01f * v; + s10 += 1.562305540e-01f * v; + s11 += -5.328702182e-02f * v; + } + } + + dst[0 * dst_stride] = s0; + dst[1 * dst_stride] = s1; + dst[2 * dst_stride] = s2; + dst[3 * dst_stride] = s3; + dst[4 * dst_stride] = s4; + dst[5 * dst_stride] = s5; + dst[6 * dst_stride] = s6; + dst[7 * dst_stride] = s7; + dst[8 * dst_stride] = s8; + dst[9 * dst_stride] = s9; + dst[10 * dst_stride] = s10; + dst[11 * dst_stride] = s11; +} diff --git a/external/basis_universal/transcoder/basisu_transcoder.cpp b/external/basis_universal/transcoder/basisu_transcoder.cpp index b22b75716c..a50ff4c620 100644 --- a/external/basis_universal/transcoder/basisu_transcoder.cpp +++ b/external/basis_universal/transcoder/basisu_transcoder.cpp @@ -1,5 +1,5 @@ // basisu_transcoder.cpp -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -107,6 +107,10 @@ #define BASISD_SUPPORT_ASTC 1 #endif +#ifndef BASISD_SUPPORT_XUASTC +#define BASISD_SUPPORT_XUASTC 1 +#endif + // Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support. #ifndef BASISD_SUPPORT_ATC #define BASISD_SUPPORT_ATC 1 @@ -161,6 +165,7 @@ #define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0 #ifndef BASISD_ENABLE_DEBUG_FLAGS + // DO NOT CHECK IN #define BASISD_ENABLE_DEBUG_FLAGS 0 #endif @@ -177,6 +182,14 @@ using namespace basist::astc_6x6_hdr; #endif +#if BASISD_IS_BIG_ENDIAN +const uint32_t BASISD_COLOR_RGBA_A_MASK = 0x000000FF; +const uint32_t BASISD_COLOR_RGBA_RGB_MASK = ~BASISD_COLOR_RGBA_A_MASK; +#else +const uint32_t BASISD_COLOR_RGBA_A_MASK = 0xFF000000; +const uint32_t BASISD_COLOR_RGBA_RGB_MASK = ~BASISD_COLOR_RGBA_A_MASK; +#endif + namespace basisu { bool g_debug_printf; @@ -188,7 +201,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -202,7 +215,7 @@ namespace basisu void debug_puts(const char* p) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -235,6 +248,14 @@ namespace basist g_debug_flags = f; #endif } + + // Used by arith encoder/decoder + namespace arith_fastbits_f32 + { + bool g_initialized; + float g_lut_edge[TABLE_SIZE + 1]; // samples at m = 1 + i/TABLE_SIZE (for linear) + + } // namespace arith_fastbits_f32 inline uint16_t byteswap_uint16(uint16_t v) { @@ -312,6 +333,8 @@ namespace basist res[i] = basisu::lerp(a[i], b[i], s); return res; } + + inline float norm() const { return dot(*this); } }; uint16_t crc16(const void* r, size_t size, uint16_t crc) @@ -329,6 +352,62 @@ namespace basist return static_cast(~crc); } + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len) + { + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; + } + struct vec4F { float c[4]; @@ -338,7 +417,7 @@ namespace basist float operator[] (uint32_t index) const { assert(index < 4); return c[index]; } float& operator[] (uint32_t index) { assert(index < 4); return c[index]; } }; - + enum etc_constants { cETC1BytesPerBlock = 8U, @@ -411,14 +490,14 @@ namespace basist //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; struct decoder_etc_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -686,7 +765,7 @@ namespace basist { return (m_bytes[3] & 2) != 0; } - + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -701,7 +780,7 @@ namespace basist const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } - + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const { color32 b; @@ -819,7 +898,7 @@ namespace basist g = c.g; b = c.b; } - + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) { result = unpack_color5(packed_color5, scaled, 255); @@ -948,7 +1027,7 @@ namespace basist static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) { assert(index < 4); - + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1124,7 +1203,7 @@ namespace basist { 1, 2, 2, 2 }, { 1, 2, 3, 3 }, }; - + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; @@ -1515,9 +1594,9 @@ namespace basist return best_err; } #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES - + static -#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES const #endif etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = @@ -2006,7 +2085,7 @@ namespace basist void uastc_init(); #endif -#if BASISD_SUPPORT_UASTC_HDR +#if BASISD_SUPPORT_UASTC_HDR namespace astc_6x6_hdr { static void init_quantize_tables(); @@ -2021,19 +2100,24 @@ namespace basist } #endif - static bool g_transcoder_initialized; + namespace astc_ldr_t + { + void init_transcoding_tables(); + } + static bool g_transcoder_initialized; + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { if (g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; } - - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); @@ -2041,7 +2125,8 @@ namespace basist #if BASISD_SUPPORT_UASTC_HDR // TODO: Examine this, optimize for startup time/mem utilization. - astc_helpers::init_tables(true); + // XUASTC LDR decompressors need the rank tables + astc_helpers::init_tables(); astc_hdr_core_init(); #endif @@ -2049,7 +2134,7 @@ namespace basist #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif - + #if BASISD_WRITE_NEW_ASTC_TABLES create_etc1_to_astc_conversion_table_0_47(); create_etc1_to_astc_conversion_table_0_255(); @@ -2159,11 +2244,25 @@ namespace basist astc_6x6_hdr::init_quantize_tables(); fast_encode_bc6h_init(); #endif - + #if BASISD_SUPPORT_BC7_MODE5 bc7_mode_5_encoder::encode_bc7_mode5_init(); #endif +#if BASISD_SUPPORT_XUASTC + // TODO: XUASTC support macro + astc_ldr_t::init(); + + astc_ldr_t::init_transcoding_tables(); + + // Used by arith encoder/decoder + arith_fastbits_f32::init(); + + // Used by astc ldr transcoding + bc7f::init(); + etc1f::init(); +#endif + g_transcoder_initialized = true; } @@ -2315,7 +2414,7 @@ namespace basist std::swap(l, h); pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; } - + pDst_block->set_low_color(static_cast(l)); pDst_block->set_high_color(static_cast(h)); @@ -2475,7 +2574,7 @@ namespace basist fxt1_block* pBlock = static_cast(pDst); // CC_MIXED is basically DXT1 with different encoding tricks. - // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) dxt1_block blk; convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); @@ -2488,7 +2587,7 @@ namespace basist uint32_t g0 = color0.g & 1; uint32_t g1 = color1.g & 1; - + color0.g >>= 1; color1.g >>= 1; @@ -2496,7 +2595,7 @@ namespace basist blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); - + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) { std::swap(color0, color1); @@ -2510,7 +2609,7 @@ namespace basist if (fxt1_subblock == 0) { - pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_mode = 1; pBlock->m_hi.m_alpha = 0; pBlock->m_hi.m_glsb = g1 | (g1 << 1); pBlock->m_hi.m_r0 = color0.r; @@ -2831,7 +2930,7 @@ namespace basist { uint32_t r; decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); - + pDst_block->set_low_alpha(r); pDst_block->set_high_alpha(r); pDst_block->m_selectors[0] = 0; @@ -2914,7 +3013,7 @@ namespace basist static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; - + static const uint8_t g_pvrtc_5_floor[256] = { 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, @@ -2938,7 +3037,7 @@ namespace basist 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 }; - + static const uint8_t g_pvrtc_4_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -2962,7 +3061,7 @@ namespace basist 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 }; - + static const uint8_t g_pvrtc_3_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -2986,7 +3085,7 @@ namespace basist 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - + static const uint8_t g_pvrtc_alpha_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -3093,10 +3192,10 @@ namespace basist } assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - + return color32(r, g, b, a); } - + inline color32 get_endpoint_8888(uint32_t endpoint_index) const { assert(endpoint_index < 2); @@ -3143,7 +3242,7 @@ namespace basist a = g_pvrtc_alpha[a]; } - + return color32(r, g, b, a); } @@ -3152,7 +3251,7 @@ namespace basist color32 c(get_endpoint_8888(endpoint_index)); return c.r + c.g + c.b + c.a; } - + inline uint32_t get_opaque_endpoint_l0() const { uint32_t packed = m_endpoints & 0xFFFE; @@ -3267,7 +3366,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + // opaque endpoints: 554 or 555 // transparent endpoints: 3443 or 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) @@ -3320,7 +3419,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) { assert(endpoint_index < 2); @@ -3430,12 +3529,6 @@ namespace basist }; #endif - struct pvrtc1_temp_block - { - decoder_etc_block m_etc1_block; - uint32_t m_pvrtc_endpoints; - }; - static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints) { uint32_t packed = endpoints; @@ -3545,7 +3638,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3553,7 +3646,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3702,8 +3795,8 @@ namespace basist } static void fixup_pvrtc1_4_modulation_rgba( - const decoder_etc_block* pETC_Blocks, - const uint32_t* pPVRTC_endpoints, + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, const endpoint* pEndpoints, const selector* pSelectors) { @@ -3726,7 +3819,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3734,7 +3827,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3748,13 +3841,13 @@ namespace basist for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) { const decoder_etc_block& src_block = pETC_Blocks[block_index]; - + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; - + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); - + uint32_t swizzled = x_swizzle | y_swizzle; if (num_blocks_x != num_blocks_y) { @@ -3897,7 +3990,7 @@ namespace basist const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; - + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = { @@ -3919,11 +4012,11 @@ namespace basist uint8_t m_hi; uint16_t m_err; }; - + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_bc7_m5_color.inc" }; - + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = { { 0, 3 }, @@ -3948,7 +4041,7 @@ namespace basist { #include "basisu_transcoder_tables_bc7_m5_alpha.inc" }; - + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) { assert(num_bits < 32); @@ -3973,83 +4066,6 @@ namespace basist return cur_ofs; } - struct bc7_mode_5 - { - union - { - struct - { - uint64_t m_mode : 6; - uint64_t m_rot : 2; - - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 8; - uint64_t m_a1_0 : 6; - - } m_lo; - - uint64_t m_lo_bits; - }; - - union - { - struct - { - uint64_t m_a1_1 : 2; - - // bit 2 - uint64_t m_c00 : 1; - uint64_t m_c10 : 2; - uint64_t m_c20 : 2; - uint64_t m_c30 : 2; - - uint64_t m_c01 : 2; - uint64_t m_c11 : 2; - uint64_t m_c21 : 2; - uint64_t m_c31 : 2; - - uint64_t m_c02 : 2; - uint64_t m_c12 : 2; - uint64_t m_c22 : 2; - uint64_t m_c32 : 2; - - uint64_t m_c03 : 2; - uint64_t m_c13 : 2; - uint64_t m_c23 : 2; - uint64_t m_c33 : 2; - - // bit 33 - uint64_t m_a00 : 1; - uint64_t m_a10 : 2; - uint64_t m_a20 : 2; - uint64_t m_a30 : 2; - - uint64_t m_a01 : 2; - uint64_t m_a11 : 2; - uint64_t m_a21 : 2; - uint64_t m_a31 : 2; - - uint64_t m_a02 : 2; - uint64_t m_a12 : 2; - uint64_t m_a22 : 2; - uint64_t m_a32 : 2; - - uint64_t m_a03 : 2; - uint64_t m_a13 : 2; - uint64_t m_a23 : 2; - uint64_t m_a33 : 2; - - } m_hi; - - uint64_t m_hi_bits; - }; - }; - #if BASISD_WRITE_NEW_BC7_MODE5_TABLES static void create_etc1_to_bc7_m5_color_conversion_table() { @@ -4095,7 +4111,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -4174,7 +4190,7 @@ namespace basist int mapping_err = block_colors[s].g - colors[k]; mapping_err *= mapping_err; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) mapping_err *= 5; @@ -4185,7 +4201,7 @@ namespace basist best_k = k; } } // k - + total_err += best_mapping_err; output_selectors |= (best_k << (s * 2)); } // s @@ -4200,7 +4216,7 @@ namespace basist } // lo } // hi - + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); n++; if ((n & 31) == 31) @@ -4239,7 +4255,7 @@ namespace basist {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} }; - + static void transcoder_init_bc7_mode5() { #if 0 @@ -4267,9 +4283,9 @@ namespace basist } } // hi - + } // lo - + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); if ((i & 15) == 15) printf("\n"); } @@ -4293,7 +4309,7 @@ namespace basist static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { bc7_mode_5* pDst_block = static_cast(pDst); - + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; @@ -4419,7 +4435,7 @@ namespace basist pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; - + s_inv = 3; } else @@ -4440,7 +4456,7 @@ namespace basist for (uint32_t x = 0; x < 4; x++) { const uint32_t s = pSelector->get_selector(x, y); - + const uint32_t os = pSelectors_xlat[s] ^ s_inv; output_bits |= (os << output_bit_ofs); @@ -4470,7 +4486,7 @@ namespace basist pDst_block->m_lo.m_a0 = r; pDst_block->m_lo.m_a1_0 = r & 63; pDst_block->m_hi.m_a1_1 = r >> 6; - + return; } else if (pSelector->m_num_unique_selectors == 2) @@ -4520,7 +4536,7 @@ namespace basist } const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; - + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; pDst_block->m_lo.m_a0 = pTable->m_lo; @@ -4561,10 +4577,12 @@ namespace basist set_block_bits((uint8_t*)pDst, output_bits, 31, 97); } +#if 0 static inline vec3F rgb_to_ycocg(const vec3F& rgb) { return vec3F(rgb.dot(vec3F(0.25f, 0.5f, 0.25f)), rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f))); } +#endif static inline vec2F rgb_to_cocg(const vec3F& rgb) { @@ -4576,11 +4594,14 @@ namespace basist return vec3F(ycocg.dot(vec3F(1.0f, 1.0f, -1.0f)), ycocg.dot(vec3F(1.0f, 0.0f, 1.0f)), ycocg.dot(vec3F(1.0f, -1.0f, -1.0f))); } +#if 0 static inline vec3F color32_to_vec3F(const color32& c) { return vec3F(c.r, c.g, c.b); } +#endif +#if 0 static inline vec3F color5_to_ycocg(const endpoint& e) { const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2); @@ -4588,6 +4609,7 @@ namespace basist const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2); return rgb_to_ycocg(vec3F((float)r, (float)g, (float)b)); } +#endif static inline vec2F color5_to_cocg(const endpoint& e) { @@ -4620,7 +4642,7 @@ namespace basist const bool hq_bc7_mode_5_encoder_mode = false; const int CHROMA_THRESH = 10; - + uint32_t total_filtered_blocks = 0; BASISU_NOTE_UNUSED(total_filtered_blocks); @@ -4629,7 +4651,7 @@ namespace basist for (int bx = 0; bx < (int)num_blocks_x; bx++) { vec2F center_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(bx, by)])); - + //bool filter_flag = false; for (int dy = -1; dy <= 1; dy++) { @@ -4667,7 +4689,7 @@ namespace basist total_filtered_blocks++; bc7_mode_5* pDst_block = (bc7_mode_5*)(static_cast(pDst_blocks) + (bx + by * output_row_pitch_in_blocks_or_pixels) * sizeof(bc7_mode_5)); - + //memset(pDst_block, 0x80, 16); int lr = bc7_7_to_8(pDst_block->m_lo.m_r0); @@ -4691,7 +4713,7 @@ namespace basist float block_y_vals[16]; // [y][x] float y_sum = 0.0f, y_sum_sq = 0.0f; - + for (uint32_t i = 0; i < 16; i++) { uint32_t sel = sel_bits & (i ? 3 : 1); @@ -4700,7 +4722,7 @@ namespace basist block_y_vals[i] = y; y_sum += y; y_sum_sq += y * y; - + } // i const float S = 1.0f / 16.0f; @@ -4723,14 +4745,14 @@ namespace basist const float fy = ((float)((bpy + 2) & 3) + .5f) * (1.0f / 4.0f); const int ubx = bx + ((bpx - 2) >> 2); - + vec2F a(get_endpoint_cocg_clamped(ubx, uby, decoded_endpoints, pEndpoints)); vec2F b(get_endpoint_cocg_clamped(ubx + 1, uby, decoded_endpoints, pEndpoints)); vec2F c(get_endpoint_cocg_clamped(ubx, uby + 1, decoded_endpoints, pEndpoints)); vec2F d(get_endpoint_cocg_clamped(ubx + 1, uby + 1, decoded_endpoints, pEndpoints)); assert((fx >= 0) && (fx <= 1.0f) && (fy >= 0) && (fy <= 1.0f)); - + // TODO: Could merge this into 4 muls on each corner by weights vec2F ab = vec2F::lerp(a, b, fx); vec2F cd = vec2F::lerp(c, d, fx); @@ -4747,7 +4769,7 @@ namespace basist } // y bc7_mode_5_encoder::encode_bc7_mode_5_block(pDst_block, block_to_pack, hq_bc7_mode_5_encoder_mode); - + } // bx } // by @@ -5136,7 +5158,7 @@ namespace basist } #endif // BASISD_SUPPORT_ETC2_EAC_RG11 -// ASTC + // ASTC struct etc1_to_astc_solution { uint8_t m_lo; @@ -5183,7 +5205,7 @@ namespace basist // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_astc_0_255.inc" @@ -5248,7 +5270,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 8; @@ -5269,7 +5291,7 @@ namespace basist mapping_best_high[m] = best_hi; mapping_best_err[m] = best_err; highest_best_err = basisu::maximum(highest_best_err, best_err); - + } // m for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) @@ -5345,7 +5367,7 @@ namespace basist { int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. int err_scale = 1; if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) @@ -5374,9 +5396,9 @@ namespace basist uint64_t err = mapping_best_err[m]; err = basisu::minimum(err, 0xFFFF); - + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); - + n++; if ((n & 31) == 31) fprintf(pFile, "\n"); @@ -5459,14 +5481,14 @@ namespace basist struct astc_block_params { // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; + uint8_t m_endpoints[10]; uint8_t m_weights[32]; }; - - // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. - // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization // 32 total weights, stored as 16 CA CA, each ranging from 0-3. @@ -5488,7 +5510,7 @@ namespace basist astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. - + for (uint32_t i = 0; i < 32; i++) { static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; @@ -5497,7 +5519,7 @@ namespace basist } } - // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) { @@ -5535,7 +5557,7 @@ namespace basist // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; - + pOutput[2] = 0; pOutput[3] = 0; @@ -5561,7 +5583,7 @@ namespace basist // Write constant block mode, color component selector, number of partitions, color endpoint mode // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; - + pOutput[1] = 0; pOutput[2] = 0; pOutput[3] = 0; @@ -5589,7 +5611,7 @@ namespace basist { uint8_t m_lo, m_hi; } g_astc_single_color_encoding_1[256]; - + static void transcoder_init_astc() { for (uint32_t base_color = 0; base_color < 32; base_color++) @@ -5667,7 +5689,7 @@ namespace basist g_ise_to_unquant[bit | (trit << 4)] = unq; } } - + // Compute table used for optimal single color encoding. for (int i = 0; i < 256; i++) { @@ -5682,9 +5704,9 @@ namespace basist int l = lo_v | (lo_v << 8); int h = hi_v | (hi_v << 8); - + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; - + int e = abs(v - i); if (e < lowest_e) @@ -5706,7 +5728,7 @@ namespace basist for (int lo = 0; lo < 48; lo++) { const int lo_v = g_ise_to_unquant[lo]; - + int e = abs(lo_v - i); if (e < lowest_e) @@ -5721,7 +5743,7 @@ namespace basist // Converts opaque or color+alpha ETC1S block to ASTC 4x4. // This function tries to use the best ASTC mode given the block's actual contents. - static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) { astc_block_params blk; @@ -5765,7 +5787,7 @@ namespace basist // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks uint32_t r, g, b; decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); - + uint32_t* pOutput = static_cast(pDst_block); uint8_t* pBytes = reinterpret_cast(pDst_block); @@ -5785,7 +5807,7 @@ namespace basist } else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) { - // Both color and alpha use <= 2 unique selectors each. + // Both color and alpha use <= 2 unique selectors each. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). color32 block_colors[4]; decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); @@ -5832,7 +5854,7 @@ namespace basist { uint32_t s = alpha_selectors.get_selector(x, y); s = (s == alpha_high_selector) ? 1 : 0; - + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); } // x } // y @@ -5865,12 +5887,12 @@ namespace basist return; } - + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. - + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) { @@ -5904,7 +5926,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -5912,7 +5934,7 @@ namespace basist blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; - + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; for (uint32_t y = 0; y < 4; y++) @@ -5956,10 +5978,10 @@ namespace basist { // Convert ETC1S alpha const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; - + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; @@ -6101,7 +6123,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -6145,7 +6167,7 @@ namespace basist const uint32_t r = block_colors[low_selector].r; const uint32_t g = block_colors[low_selector].g; const uint32_t b = block_colors[low_selector].b; - + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; @@ -6247,7 +6269,7 @@ namespace basist blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; - + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; bool invert = false; @@ -6412,8 +6434,8 @@ namespace basist static void transcoder_init_atc() { prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); - prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); - prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); @@ -6467,7 +6489,7 @@ namespace basist pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); - + pBlock->m_sels[0] = 0x55; pBlock->m_sels[1] = 0x55; pBlock->m_sels[2] = 0x55; @@ -6602,7 +6624,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6676,7 +6698,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6706,7 +6728,7 @@ namespace basist } // inten fclose(pFile); - + // PVRTC2 45 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); @@ -6751,7 +6773,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6828,7 +6850,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6905,7 +6927,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6982,7 +7004,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -7110,12 +7132,12 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_trans_match44[256]; - + static struct { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33[256]; - + static struct { uint8_t m_l, m_h; @@ -7125,7 +7147,7 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33_3[256]; - + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { @@ -7237,7 +7259,7 @@ namespace basist pBlock->m_modulation[3] = (uint8_t)sels3; } } - + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -7255,9 +7277,9 @@ namespace basist } static inline int sq(int x) { return x * x; } - - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. - // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) { @@ -7312,13 +7334,13 @@ namespace basist const uint32_t high_selector = pSelector->m_hi_selector; const int num_unique_color_selectors = pSelector->m_num_unique_selectors; - + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. const int br = (base_color.r << 3) | (base_color.r >> 2); const int bg = (base_color.g << 3) | (base_color.g >> 2); const int bb = (base_color.b << 3) | (base_color.b >> 2); - + color32 block_cols[4]; for (uint32_t i = 0; i < 4; i++) { @@ -7347,14 +7369,14 @@ namespace basist decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); // Mod 0 - uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); uint32_t ca0 = (la0 << 1); - + cr0 = (cr0 << 3) | (cr0 >> 2); cg0 = (cg0 << 3) | (cg0 >> 2); cb0 = (cb0 << 3) | (cb0 >> 2); @@ -7383,14 +7405,14 @@ namespace basist uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); uint32_t ca3 = (la3 << 1) | 1; - + cr3 = (cr3 << 3) | (cr3 >> 2); cg3 = (cg3 << 3) | (cg3 >> 2); cb3 = (cb3 << 3) | (cb3 >> 2); ca3 = (ca3 << 4) | ca3; uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; - + // Mod 1 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; @@ -7465,7 +7487,7 @@ namespace basist // It's a solid color block. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; - + const float S = 1.0f / 255.0f; vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); @@ -7477,7 +7499,7 @@ namespace basist vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); } - // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). // To keep quality up we need to use full 4D PCA in this case. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || @@ -7528,7 +7550,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -7628,10 +7650,10 @@ namespace basist // 4433 4443 color32 trialMinColor, trialMaxColor; - + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); - + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); @@ -7704,7 +7726,7 @@ namespace basist } } } - + static void transcoder_init_pvrtc2() { for (uint32_t v = 0; v < 256; v++) @@ -7810,7 +7832,7 @@ namespace basist g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; } - + for (uint32_t v = 0; v < 256; v++) { int best_l = 0, best_h = 0, lowest_err = INT_MAX; @@ -7844,12 +7866,12 @@ namespace basist #endif // BASISD_SUPPORT_PVRTC2 //------------------------------------------------------------------------------------------------ - + // BC7 mode 5 RGB encoder #if BASISD_SUPPORT_BC7_MODE5 namespace bc7_mode_5_encoder - { + { static float g_mode5_rgba_midpoints[128]; void encode_bc7_mode5_init() @@ -8124,10 +8146,10 @@ namespace basist } int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 - + // TODO: Tune this const int32_t SIMPLE_BLOCK_THRESH = 10 * 16; - + if ((!hq_mode) && (block_max_var < SIMPLE_BLOCK_THRESH)) { const int L = 16, H = 239; @@ -8168,7 +8190,7 @@ namespace basist saxis_g = (int)(alt_xg * m); saxis_b = (int)(alt_xb * m); } - + saxis_r = (int)((uint32_t)saxis_r << 4U); saxis_g = (int)((uint32_t)saxis_g << 4U); saxis_b = (int)((uint32_t)saxis_b << 4U); @@ -8320,7 +8342,7 @@ namespace basist sym_codec.stop(); m_local_selectors.resize(num_selectors); - + if (!sym_codec.init(pSelectors_data, selectors_data_size)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); @@ -8345,7 +8367,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n"); return false; } - + const bool used_raw_encoding = (sym_codec.get_bits(1) == 1); if (used_raw_encoding) @@ -8526,7 +8548,7 @@ namespace basist if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; } - + basisu::vector* pPrev_frame_indices = nullptr; if (is_video) { @@ -8554,12 +8576,12 @@ namespace basist } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; memset(&block, 0, sizeof(block)); - + //block.set_flip_bit(true); // Setting the flip bit to false to be compatible with the Khronos KDFS. block.set_flip_bit(false); @@ -8595,7 +8617,7 @@ namespace basist if (!endpoints.size() || !selectors.size()) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n"); - + if (pPVRTC_work_mem) free(pPVRTC_work_mem); @@ -8606,7 +8628,7 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; #if BASISD_SUPPORT_BC7_MODE5 - const bool bc7_chroma_filtering = ((decode_flags & cDecodeFlagsNoETC1SChromaFiltering) == 0) && + const bool bc7_chroma_filtering = ((decode_flags & cDecodeFlagsNoETC1SChromaFiltering) == 0) && ((fmt == block_format::cBC7_M5_COLOR) || (fmt == block_format::cBC7)); basisu::vector2D decoded_endpoints; @@ -8831,7 +8853,7 @@ namespace basist case block_format::cETC1: { decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8882,7 +8904,7 @@ namespace basist const uint32_t low_selector = pSelector->m_lo_selector; const uint32_t high_selector = pSelector->m_hi_selector; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 block_colors[2]; decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); @@ -8898,7 +8920,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8906,7 +8928,7 @@ namespace basist { #if BASISD_SUPPORT_PVRTC1 assert(pAlpha_blocks); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8914,7 +8936,7 @@ namespace basist ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; - // Get block's RGBA bounding box + // Get block's RGBA bounding box const color32& base_color = pEndpoints->m_color5; const uint32_t inten_table = pEndpoints->m_inten5; const uint32_t low_selector = pSelector->m_lo_selector; @@ -8949,7 +8971,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8990,7 +9012,7 @@ namespace basist #endif break; } - case block_format::cASTC_4x4: + case block_format::cASTC_LDR_4x4: { #if BASISD_SUPPORT_ASTC void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; @@ -9039,7 +9061,7 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; @@ -9055,10 +9077,10 @@ namespace basist { assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - + int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -9072,7 +9094,7 @@ namespace basist pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); - + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); } } @@ -9101,7 +9123,7 @@ namespace basist color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); - + for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; @@ -9222,7 +9244,7 @@ namespace basist cur = byteswap_uint16(cur); cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; - + if (BASISD_IS_BIG_ENDIAN) cur = byteswap_uint16(cur); @@ -9322,7 +9344,7 @@ namespace basist if (endpoint_pred_repeat_count != 0) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); - + if (pPVRTC_work_mem) free(pPVRTC_work_mem); @@ -9353,22 +9375,19 @@ namespace basist } bool basis_validate_output_buffer_size( - basis_tex_format source_format, transcoder_texture_format target_format, uint32_t output_blocks_buf_size_in_blocks_or_pixels, uint32_t orig_width, uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels) { - BASISU_NOTE_UNUSED(source_format); - if (basis_transcoder_format_is_uncompressed(target_format)) { // Assume the output buffer is orig_width by orig_height if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = orig_width; - if (!output_rows_in_pixels) + if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; // Now make sure the output buffer is large enough, or we'll overwrite memory. @@ -9380,11 +9399,12 @@ namespace basist } else { + // Take into account the destination format's block width/height. const uint32_t dst_block_width = basis_get_block_width(target_format); const uint32_t dst_block_height = basis_get_block_height(target_format); //const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format); - - // Take into account the destination format's block width/height. + + // Compute how many blocks should be in the output. const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width; const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height; const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; @@ -9402,7 +9422,7 @@ namespace basist return true; } - + uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height) { assert(orig_width && orig_height); @@ -9418,7 +9438,7 @@ namespace basist const uint32_t bytes_per_slice = bytes_per_line * orig_height; return bytes_per_slice; } - + // Compressed formats are 2D arrays of blocks. const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format); @@ -9489,12 +9509,12 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - - if (!basis_validate_output_buffer_size(basis_tex_format::cETC1S, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n"); return false; @@ -9520,7 +9540,7 @@ namespace basist { //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -9645,7 +9665,7 @@ namespace basist if (basis_file_has_alpha_slices) { - // First decode the alpha data + // First decode the alpha data //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); } @@ -9683,8 +9703,8 @@ namespace basist return false; #else assert(bytes_per_block_or_pixel == 16); - - // First decode the alpha data + + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9754,7 +9774,7 @@ namespace basist break; #endif } - case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: { #if !BASISD_SUPPORT_ASTC BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n"); @@ -9771,13 +9791,13 @@ namespace basist { // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and // transcode both the alpha and color data at the same time to ASTC. - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); - status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_LDR_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_LDR_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags); } } else - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_LDR_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_LDR_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); if (!status) { @@ -9813,7 +9833,7 @@ namespace basist #else assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9873,7 +9893,7 @@ namespace basist } else { - // Now decode the color data and transcode to PVRTC2 RGBA. + // Now decode the color data and transcode to PVRTC2 RGBA. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags); } @@ -9894,7 +9914,7 @@ namespace basist { // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); @@ -9935,7 +9955,7 @@ namespace basist { // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags); @@ -10039,14 +10059,15 @@ namespace basist } //------------------------------------------------------------------------------------------------ - + // UASTC LDR 4x4 transcoder + //------------------------------------------------------------------------------------------------ basisu_lowlevel_uastc_ldr_4x4_transcoder::basisu_lowlevel_uastc_ldr_4x4_transcoder() { } bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice( void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) { @@ -10107,7 +10128,7 @@ namespace basist for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; - + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) { switch (fmt) @@ -10143,7 +10164,7 @@ namespace basist } case block_format::cBC4: { - if (channel0 < 0) + if (channel0 < 0) channel0 = 0; status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); break; @@ -10163,7 +10184,7 @@ namespace basist status = transcode_uastc_to_bc7(*pSource_block, pDst_block); break; } - case block_format::cASTC_4x4: + case block_format::cASTC_LDR_4x4: { status = transcode_uastc_to_astc(*pSource_block, pDst_block); break; @@ -10306,7 +10327,7 @@ namespace basist return false; #endif } - + bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image( transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, @@ -10328,7 +10349,7 @@ namespace basist { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: source data buffer too small\n"); return false; - } + } if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { @@ -10350,12 +10371,12 @@ namespace basist const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: output buffer size too small\n"); return false; } - + bool status = false; // UASTC4x4 @@ -10363,10 +10384,9 @@ namespace basist { case transcoder_texture_format::cTFETC1_RGB: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -10375,7 +10395,6 @@ namespace basist } case transcoder_texture_format::cTFETC2_RGBA: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) @@ -10387,7 +10406,6 @@ namespace basist case transcoder_texture_format::cTFBC1_RGB: { // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though. - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1, bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) @@ -10398,7 +10416,6 @@ namespace basist } case transcoder_texture_format::cTFBC3_RGBA: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) @@ -10409,9 +10426,6 @@ namespace basist } case transcoder_texture_format::cTFBC4_R: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - // nullptr, 0, - // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); @@ -10423,9 +10437,6 @@ namespace basist } case transcoder_texture_format::cTFBC5_RG: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - // nullptr, 0, - // 0, 3); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, 0, 3, decode_flags); @@ -10438,7 +10449,6 @@ namespace basist case transcoder_texture_format::cTFBC7_RGBA: case transcoder_texture_format::cTFBC7_ALT: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10449,7 +10459,6 @@ namespace basist } case transcoder_texture_format::cTFPVRTC1_4_RGB: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10460,7 +10469,6 @@ namespace basist } case transcoder_texture_format::cTFPVRTC1_4_RGBA: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10469,10 +10477,9 @@ namespace basist } break; } - case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4, + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_LDR_4x4, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) { @@ -10483,29 +10490,26 @@ namespace basist case transcoder_texture_format::cTFATC_RGB: case transcoder_texture_format::cTFATC_RGBA: { - BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->ATC currently unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC LDR 4x4->ATC currently unsupported\n"); return false; } case transcoder_texture_format::cTFFXT1_RGB: { - BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC LDR 4x4->FXT1 currently unsupported\n"); return false; } case transcoder_texture_format::cTFPVRTC2_4_RGB: { - BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC LDR 4x4->PVRTC2 currently unsupported\n"); return false; } case transcoder_texture_format::cTFPVRTC2_4_RGBA: { - BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC LDR 4x4->PVRTC2 currently unsupported\n"); return false; } case transcoder_texture_format::cTFETC2_EAC_R11: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - // nullptr, 0, - // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); @@ -10517,9 +10521,6 @@ namespace basist } case transcoder_texture_format::cTFETC2_EAC_RG11: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - // nullptr, 0, - // 0, 3); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, 0, 3, decode_flags); @@ -10531,7 +10532,6 @@ namespace basist } case transcoder_texture_format::cTFRGBA32: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10542,7 +10542,6 @@ namespace basist } case transcoder_texture_format::cTFRGB565: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10553,7 +10552,6 @@ namespace basist } case transcoder_texture_format::cTFBGR565: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10564,7 +10562,6 @@ namespace basist } case transcoder_texture_format::cTFRGBA4444: { - //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); if (!status) @@ -10583,9 +10580,10 @@ namespace basist return status; } - + + //------------------------------------------------------------------------------------------------ + // UASTC HDR 4x4 transcoding //------------------------------------------------------------------------------------------------ - // UASTC HDR 4x4 basisu_lowlevel_uastc_hdr_4x4_transcoder::basisu_lowlevel_uastc_hdr_4x4_transcoder() { @@ -10593,7 +10591,7 @@ namespace basist bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice( void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) { @@ -10642,7 +10640,7 @@ namespace basist bool status = false; // TODO: Optimize pure memcpy() case. - + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; @@ -10677,7 +10675,7 @@ namespace basist uint32_t blk_texels[4][4]; status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5); - + if (status) { const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); @@ -10691,7 +10689,7 @@ namespace basist } // y } } - + break; } case block_format::cRGBA_HALF: @@ -10703,7 +10701,7 @@ namespace basist half_float* pDst_pixels = reinterpret_cast( static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4 ); - + half_float blk_texels[4][4][4]; status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16); @@ -10769,7 +10767,7 @@ namespace basist if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } @@ -10825,7 +10823,7 @@ namespace basist const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC_HDR_4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: output buffer size too small\n"); return false; @@ -10962,7 +10960,7 @@ namespace basist assert(((orig_width + 5) / 6) == num_blocks_x); assert(((orig_height + 5) / 6) == num_blocks_y); - + if (fmt == block_format::cBC6H) { const uint32_t num_dst_blocks_x = (orig_width + 3) / 4; @@ -10987,7 +10985,7 @@ namespace basist fast_bc6h_params bc6h_enc_params; const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0; bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0; - + for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2) { const uint32_t num_inner_blocks_y = basisu::minimum(2, num_blocks_y - src_block_y); @@ -11003,7 +11001,7 @@ namespace basist const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix); half_float blk_texels[6][6][4]; - + astc_helpers::log_astc_block log_blk; status = astc_helpers::unpack_block(pS, log_blk, 6, 6); if (!status) @@ -11011,7 +11009,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } - + status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); if (!status) { @@ -11026,14 +11024,14 @@ namespace basist unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0]; unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1]; unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2]; - + } // x } // y } // ix } // iy - + const uint32_t dst_x = src_block_x * 6; assert((dst_x & 3) == 0); const uint32_t dst_block_x = dst_x >> 2; @@ -11066,10 +11064,10 @@ namespace basist src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0]; src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1]; src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2]; - + } // x } // y - + astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params); } // dx @@ -11078,7 +11076,7 @@ namespace basist } // block_x } // block_y - + status = true; } else @@ -11204,7 +11202,7 @@ namespace basist if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } @@ -11261,7 +11259,7 @@ namespace basist const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) { BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: output buffer size too small\n"); return false; @@ -11334,14 +11332,14 @@ namespace basist } //------------------------------------------------------------------------------------------------ - // ASTC 6x6 HDR intermediate + // UASTC 6x6 HDR intermediate - basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder() + basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder() { } // num_blocks_x/num_blocks_y are source 6x6 blocks - bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice( + bool basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice( void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, @@ -11359,7 +11357,7 @@ namespace basist assert(g_transcoder_initialized); if (!g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder not globally initialized.\n"); return false; } @@ -11372,14 +11370,14 @@ namespace basist bool dec_status = astc_6x6_hdr::decode_6x6_hdr(pImage_data, image_data_size, decoded_blocks, dec_width, dec_height); if (!dec_status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: decode_6x6_hdr() failed.\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: decode_6x6_hdr() failed.\n"); return false; } if ((dec_width != orig_width) || (dec_height != orig_height) || (decoded_blocks.get_width() != num_blocks_x) || (decoded_blocks.get_height() != num_blocks_y)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: unexpected decoded width/height\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: unexpected decoded width/height\n"); return false; } @@ -11422,20 +11420,20 @@ namespace basist } else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n"); return false; } if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n"); return false; } fast_bc6h_params bc6h_enc_params; const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0; bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0; - + for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2) { const uint32_t num_inner_blocks_y = basisu::minimum(2, num_blocks_y - src_block_y); @@ -11456,14 +11454,14 @@ namespace basist status = astc_helpers::unpack_block(pS, log_blk, 6, 6); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } @@ -11516,7 +11514,7 @@ namespace basist } // x } // y - + astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params); } // dx @@ -11651,7 +11649,7 @@ namespace basist if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n"); return false; } @@ -11662,7 +11660,7 @@ namespace basist return true; #else - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: ASTC HDR is unsupported\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_slice: ASTC HDR is unsupported\n"); BASISU_NOTE_UNUSED(decode_flags); BASISU_NOTE_UNUSED(channel0); @@ -11681,7 +11679,7 @@ namespace basist #endif } - bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image( + bool basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image( transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, const uint8_t* pCompressed_data, uint32_t compressed_data_length, @@ -11701,16 +11699,16 @@ namespace basist if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: source data buffer too small\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: source data buffer too small\n"); return false; } const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); //const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; - if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: output buffer size too small\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: output buffer size too small\n"); return false; } @@ -11725,7 +11723,7 @@ namespace basist if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n"); } break; } @@ -11735,7 +11733,7 @@ namespace basist bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to BC6H failed\n"); } break; } @@ -11745,7 +11743,7 @@ namespace basist bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n"); } break; } @@ -11755,7 +11753,7 @@ namespace basist bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); } break; } @@ -11765,14 +11763,14 @@ namespace basist bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1 , decode_flags); if (!status) { - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n"); } break; } default: { assert(0); - BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: Invalid format\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder::transcode_image: Invalid format\n"); break; } } @@ -11781,7 +11779,7 @@ namespace basist } //------------------------------------------------------------------------------------------------ - + basisu_transcoder::basisu_transcoder() : m_ready_to_transcode(false) { @@ -11809,7 +11807,7 @@ namespace basist return false; } } -#endif +#endif return true; } @@ -11896,7 +11894,7 @@ namespace basist return false; } } - + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { @@ -11912,7 +11910,7 @@ namespace basist return false; } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -12028,20 +12026,20 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - + image_info.m_alpha_flag = false; // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; - + const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format)); const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format)); - + image_info.m_width = slice_desc.m_num_blocks_x * block_width; image_info.m_height = slice_desc.m_num_blocks_y * block_height; image_info.m_orig_width = slice_desc.m_orig_width; @@ -12161,13 +12159,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; - + const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format)); const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format)); @@ -12230,8 +12228,9 @@ namespace basist file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); - + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; + file_info.m_srgb = (pHeader->m_flags & cBASISHeaderFlagSRGB) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; const uint32_t total_slices = pHeader->m_total_slices; @@ -12302,7 +12301,7 @@ namespace basist return true; } - + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { if (!validate_header_quick(pData, data_size)) @@ -12410,7 +12409,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); } } - + m_ready_to_transcode = true; return true; @@ -12421,7 +12420,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); m_ready_to_transcode = false; - + return true; } @@ -12459,6 +12458,8 @@ namespace basist const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; + const uint32_t dst_block_width = get_block_width(fmt), dst_block_height = get_block_height(fmt); + if (basis_block_format_is_uncompressed(fmt)) { // Assume the output buffer is orig_width by orig_height @@ -12487,33 +12488,23 @@ namespace basist return false; } } - else if (fmt == block_format::cASTC_HDR_6x6) - { - const uint32_t num_blocks_6x6_x = (slice_desc.m_orig_width + 5) / 6; - const uint32_t num_blocks_6x6_y = (slice_desc.m_orig_height + 5) / 6; - const uint32_t total_blocks_6x6 = num_blocks_6x6_x * num_blocks_6x6_y; - - if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6\n"); - return false; - } - } else { - // must be a 4x4 pixel block format - const uint32_t num_blocks_4x4_x = (slice_desc.m_orig_width + 3) / 4; - const uint32_t num_blocks_4x4_y = (slice_desc.m_orig_height + 3) / 4; - const uint32_t total_4x4_blocks = num_blocks_4x4_x * num_blocks_4x4_y; + const uint32_t dst_num_blocks_x = (slice_desc.m_orig_width + dst_block_width - 1) / dst_block_width; + const uint32_t dst_num_blocks_y = (slice_desc.m_orig_height + dst_block_height - 1) / dst_block_height; + const uint32_t dst_total_blocks = dst_num_blocks_x * dst_num_blocks_y; - if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks) + if (output_blocks_buf_size_in_blocks_or_pixels < dst_total_blocks) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n"); return false; } } + + const bool is_xuastc_ldr = basis_tex_format_is_xuastc_ldr((basis_tex_format)(uint32_t)pHeader->m_tex_format); + const bool is_astc_ldr = basis_tex_format_is_astc_ldr((basis_tex_format)(uint32_t)pHeader->m_tex_format); - if ((pHeader->m_tex_format == (uint32_t)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (uint32_t)basis_tex_format::cUASTC4x4)) + if ((pHeader->m_tex_format == (uint32_t)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (uint32_t)basis_tex_format::cUASTC_LDR_4x4) || is_xuastc_ldr || is_astc_ldr) { if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) { @@ -12538,16 +12529,18 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6) { + // ASTC HDR 6x6 return m_lowlevel_astc_6x6_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); } - else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) { + // UASTC HDR 6x6 return m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, @@ -12555,20 +12548,33 @@ namespace basist } else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) { + // UASTC HDR 4x4 return m_lowlevel_uastc_4x4_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); } - else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_LDR_4x4) + { + // UASTC LDR 4x4 + return m_lowlevel_uastc_ldr_4x4_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else if ((is_xuastc_ldr) || (is_astc_ldr)) { - return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + const bool use_astc_srgb_decode_profile = (pHeader->m_flags & cBASISHeaderFlagSRGB) != 0; + + return m_lowlevel_xuastc_ldr_decoder.transcode_slice((basis_tex_format)(uint32_t)pHeader->m_tex_format, use_astc_srgb_decode_profile, pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); } else { + // must be ETC1S return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, @@ -12647,7 +12653,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; - + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) { #if BASISD_SUPPORT_ETC2_EAC_A8 @@ -12733,7 +12739,7 @@ namespace basist if (slice_index < 0) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + // Unable to find the requested image/level return false; } @@ -12742,7 +12748,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) @@ -12779,10 +12785,13 @@ namespace basist } } } - + bool status = false; - if ((pHeader->m_tex_format == (int)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)) + const bool is_xuastc_ldr = basis_tex_format_is_xuastc_ldr((basis_tex_format)(uint32_t)pHeader->m_tex_format); + const bool is_astc_ldr = basis_tex_format_is_astc_ldr((basis_tex_format)(uint32_t)pHeader->m_tex_format); + + if ((pHeader->m_tex_format == (int)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_LDR_4x4) || is_xuastc_ldr || is_astc_ldr) { // Only do this on 4x4 LDR formats that supports transcoding to PVRTC1. const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; @@ -12790,28 +12799,28 @@ namespace basist if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } } - + if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6) { + // ASTC HDR 6x6 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; - // Use the container independent image transcode method. status = m_lowlevel_astc_6x6_hdr_decoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) { + // UASTC HDR 6x6 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; - // Use the container independent image transcode method. status = m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, @@ -12820,28 +12829,50 @@ namespace basist } else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4) { + // UASTC HDR 4x4 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; - // Use the container independent image transcode method. status = m_lowlevel_uastc_4x4_hdr_decoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_LDR_4x4) { + // UASTC LDR 4x4 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; - // Use the container independent image transcode method. - status = m_lowlevel_uastc_decoder.transcode_image(fmt, + status = m_lowlevel_uastc_ldr_4x4_decoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else + else if (is_xuastc_ldr || is_astc_ldr) + { + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + const bool use_astc_srgb_decode_profile = (pHeader->m_flags & cBASISHeaderFlagSRGB) != 0; + + status = m_lowlevel_xuastc_ldr_decoder.transcode_image((basis_tex_format)(uint32_t)pHeader->m_tex_format, use_astc_srgb_decode_profile, fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else { + // ETC1S + + // sanity check + if (pHeader->m_tex_format != (uint32_t)basis_tex_format::cETC1S) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: unsupported texture format\n"); + return false; + } + // ETC1S const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr; @@ -12866,14 +12897,14 @@ namespace basist decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); } else { - //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); } return status; @@ -12899,7 +12930,22 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: case transcoder_texture_format::cTFBC3_RGBA: case transcoder_texture_format::cTFBC5_RG: - case transcoder_texture_format::cTFASTC_4x4_RGBA: + + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: case transcoder_texture_format::cTFATC_RGBA: @@ -12939,7 +12985,7 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_4X4_RGBA"; case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return "ASTC_HDR_6X6_RGBA"; case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; @@ -12957,6 +13003,22 @@ namespace basist case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; case transcoder_texture_format::cTFBC6H: return "BC6H"; + + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: return "ASTC_LDR_4X4_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: return "ASTC_LDR_5X4_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: return "ASTC_LDR_5X5_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: return "ASTC_LDR_6X5_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: return "ASTC_LDR_6X6_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: return "ASTC_LDR_8X5_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: return "ASTC_LDR_8X6_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: return "ASTC_LDR_10X5_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: return "ASTC_LDR_10X6_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: return "ASTC_LDR_8X8_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: return "ASTC_LDR_10X8_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: return "ASTC_LDR_10X10_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: return "ASTC_LDR_12X10_RGBA"; + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return "ASTC_LDR_12X12_RGBA"; + default: assert(0); BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); @@ -12965,6 +13027,51 @@ namespace basist return ""; } + const char* basis_get_tex_format_name(basis_tex_format fmt) + { + switch (fmt) + { + case basis_tex_format::cETC1S: return "ETC1S"; break; + case basis_tex_format::cUASTC_LDR_4x4: return "UASTC LDR 4x4"; break; + case basis_tex_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4"; break; + case basis_tex_format::cASTC_HDR_6x6: return "ASTC_HDR_6x6"; break; + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: return "UASTC_HDR_6x6"; break; + case basis_tex_format::cXUASTC_LDR_4x4: return "XUASTC LDR 4x4"; break; + case basis_tex_format::cXUASTC_LDR_5x4: return "XUASTC LDR 5x4"; break; + case basis_tex_format::cXUASTC_LDR_5x5: return "XUASTC LDR 5x5"; break; + case basis_tex_format::cXUASTC_LDR_6x5: return "XUASTC LDR 6x5"; break; + case basis_tex_format::cXUASTC_LDR_6x6: return "XUASTC LDR 6x6"; break; + case basis_tex_format::cXUASTC_LDR_8x5: return "XUASTC LDR 8x5"; break; + case basis_tex_format::cXUASTC_LDR_8x6: return "XUASTC LDR 8x6"; break; + case basis_tex_format::cXUASTC_LDR_10x5: return "XUASTC LDR 10x5"; break; + case basis_tex_format::cXUASTC_LDR_10x6: return "XUASTC LDR 10x6"; break; + case basis_tex_format::cXUASTC_LDR_8x8: return "XUASTC LDR 8x8"; break; + case basis_tex_format::cXUASTC_LDR_10x8: return "XUASTC LDR 10x8"; break; + case basis_tex_format::cXUASTC_LDR_10x10: return "XUASTC LDR 10x10"; break; + case basis_tex_format::cXUASTC_LDR_12x10: return "XUASTC LDR 12x10"; break; + case basis_tex_format::cXUASTC_LDR_12x12: return "XUASTC LDR 12x12"; break; + case basis_tex_format::cASTC_LDR_4x4: return "ASTC LDR 4x4"; break; + case basis_tex_format::cASTC_LDR_5x4: return "ASTC LDR 5x4"; break; + case basis_tex_format::cASTC_LDR_5x5: return "ASTC LDR 5x5"; break; + case basis_tex_format::cASTC_LDR_6x5: return "ASTC LDR 6x5"; break; + case basis_tex_format::cASTC_LDR_6x6: return "ASTC LDR 6x6"; break; + case basis_tex_format::cASTC_LDR_8x5: return "ASTC LDR 8x5"; break; + case basis_tex_format::cASTC_LDR_8x6: return "ASTC LDR 8x6"; break; + case basis_tex_format::cASTC_LDR_10x5: return "ASTC LDR 10x5"; break; + case basis_tex_format::cASTC_LDR_10x6: return "ASTC LDR 10x6"; break; + case basis_tex_format::cASTC_LDR_8x8: return "ASTC LDR 8x8"; break; + case basis_tex_format::cASTC_LDR_10x8: return "ASTC LDR 10x8"; break; + case basis_tex_format::cASTC_LDR_10x10: return "ASTC LDR 10x10"; break; + case basis_tex_format::cASTC_LDR_12x10: return "ASTC LDR 12x10"; break; + case basis_tex_format::cASTC_LDR_12x12: return "ASTC LDR 12x12"; break; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_tex_format_name: Invalid parameter\n"); + break; + } + return ""; + } + const char* basis_get_block_format_name(block_format fmt) { switch (fmt) @@ -12976,7 +13083,22 @@ namespace basist case block_format::cBC7: return "BC7"; case block_format::cETC2_RGBA: return "ETC2_RGBA"; case block_format::cBC3: return "BC3"; - case block_format::cASTC_4x4: return "ASTC_4x4"; + + case block_format::cASTC_LDR_4x4: return "ASTC_LDR_4x4"; + case block_format::cASTC_LDR_5x4: return "ASTC_LDR_5x4"; + case block_format::cASTC_LDR_5x5: return "ASTC_LDR_5x5"; + case block_format::cASTC_LDR_6x5: return "ASTC_LDR_6x5"; + case block_format::cASTC_LDR_6x6: return "ASTC_LDR_6x6"; + case block_format::cASTC_LDR_8x5: return "ASTC_LDR_8x5"; + case block_format::cASTC_LDR_8x6: return "ASTC_LDR_8x6"; + case block_format::cASTC_LDR_10x5: return "ASTC_LDR_10x5"; + case block_format::cASTC_LDR_10x6: return "ASTC_LDR_10x6"; + case block_format::cASTC_LDR_8x8: return "ASTC_LDR_8x8"; + case block_format::cASTC_LDR_10x8: return "ASTC_LDR_10x8"; + case block_format::cASTC_LDR_10x10: return "ASTC_LDR_10x10"; + case block_format::cASTC_LDR_12x10: return "ASTC_LDR_12x10"; + case block_format::cASTC_LDR_12x12: return "ASTC_LDR_12x12"; + case block_format::cATC_RGB: return "ATC_RGB"; case block_format::cRGBA32: return "RGBA32"; case block_format::cRGB565: return "RGB565"; @@ -13027,7 +13149,22 @@ namespace basist { case transcoder_texture_format::cTFETC2_RGBA: case transcoder_texture_format::cTFBC3_RGBA: - case transcoder_texture_format::cTFASTC_4x4_RGBA: + + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders case transcoder_texture_format::cTFBC7_RGBA: @@ -13062,6 +13199,33 @@ namespace basist return false; } + bool basis_is_transcoder_texture_format_astc(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: + return true; + default: + break; + } + return false; + } + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) { switch (fmt) @@ -13076,7 +13240,22 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4; + + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4; + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: return basisu::texture_format::cASTC_LDR_5x4; + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: return basisu::texture_format::cASTC_LDR_5x5; + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: return basisu::texture_format::cASTC_LDR_6x5; + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: return basisu::texture_format::cASTC_LDR_6x6; + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: return basisu::texture_format::cASTC_LDR_8x5; + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: return basisu::texture_format::cASTC_LDR_8x6; + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: return basisu::texture_format::cASTC_LDR_10x5; + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: return basisu::texture_format::cASTC_LDR_10x6; + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: return basisu::texture_format::cASTC_LDR_8x8; + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: return basisu::texture_format::cASTC_LDR_10x8; + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: return basisu::texture_format::cASTC_LDR_10x10; + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: return basisu::texture_format::cASTC_LDR_12x10; + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return basisu::texture_format::cASTC_LDR_12x12; + case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4; case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return basisu::texture_format::cASTC_HDR_6x6; case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned; @@ -13142,14 +13321,14 @@ namespace basist } return false; } - + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) { switch (fmt) { case transcoder_texture_format::cTFRGBA32: case transcoder_texture_format::cTFRGB_9E5: - return sizeof(uint32_t); + return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: @@ -13163,27 +13342,57 @@ namespace basist } return 0; } - - uint32_t basis_get_block_width(transcoder_texture_format tex_type) + + uint32_t basis_get_block_width(transcoder_texture_format fmt) { - switch (tex_type) + switch (fmt) { case transcoder_texture_format::cTFFXT1_RGB: return 8; case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return 6; + + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: return 6; + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: return 6; + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: return 8; + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: return 8; + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: return 8; + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: return 12; + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return 12; + default: break; } return 4; } - uint32_t basis_get_block_height(transcoder_texture_format tex_type) + uint32_t basis_get_block_height(transcoder_texture_format fmt) { - switch (tex_type) + switch (fmt) { case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return 6; + + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: return 6; + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: return 6; + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: return 5; + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: return 6; + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: return 8; + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: return 8; + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: return 10; + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return 12; + + default: break; } @@ -13195,8 +13404,36 @@ namespace basist switch (fmt) { case basis_tex_format::cASTC_HDR_6x6: - case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: return 6; + case basis_tex_format::cXUASTC_LDR_4x4: return 4; + case basis_tex_format::cXUASTC_LDR_5x4: return 5; + case basis_tex_format::cXUASTC_LDR_5x5: return 5; + case basis_tex_format::cXUASTC_LDR_6x5: return 6; + case basis_tex_format::cXUASTC_LDR_6x6: return 6; + case basis_tex_format::cXUASTC_LDR_8x5: return 8; + case basis_tex_format::cXUASTC_LDR_8x6: return 8; + case basis_tex_format::cXUASTC_LDR_10x5: return 10; + case basis_tex_format::cXUASTC_LDR_10x6: return 10; + case basis_tex_format::cXUASTC_LDR_8x8: return 8; + case basis_tex_format::cXUASTC_LDR_10x8: return 10; + case basis_tex_format::cXUASTC_LDR_10x10: return 10; + case basis_tex_format::cXUASTC_LDR_12x10: return 12; + case basis_tex_format::cXUASTC_LDR_12x12: return 12; + case basis_tex_format::cASTC_LDR_4x4: return 4; + case basis_tex_format::cASTC_LDR_5x4: return 5; + case basis_tex_format::cASTC_LDR_5x5: return 5; + case basis_tex_format::cASTC_LDR_6x5: return 6; + case basis_tex_format::cASTC_LDR_6x6: return 6; + case basis_tex_format::cASTC_LDR_8x5: return 8; + case basis_tex_format::cASTC_LDR_8x6: return 8; + case basis_tex_format::cASTC_LDR_10x5: return 10; + case basis_tex_format::cASTC_LDR_10x6: return 10; + case basis_tex_format::cASTC_LDR_8x8: return 8; + case basis_tex_format::cASTC_LDR_10x8: return 10; + case basis_tex_format::cASTC_LDR_10x10: return 10; + case basis_tex_format::cASTC_LDR_12x10: return 12; + case basis_tex_format::cASTC_LDR_12x12: return 12; default: break; } @@ -13208,8 +13445,36 @@ namespace basist switch (fmt) { case basis_tex_format::cASTC_HDR_6x6: - case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: return 6; + case basis_tex_format::cXUASTC_LDR_4x4: return 4; + case basis_tex_format::cXUASTC_LDR_5x4: return 4; + case basis_tex_format::cXUASTC_LDR_5x5: return 5; + case basis_tex_format::cXUASTC_LDR_6x5: return 5; + case basis_tex_format::cXUASTC_LDR_6x6: return 6; + case basis_tex_format::cXUASTC_LDR_8x5: return 5; + case basis_tex_format::cXUASTC_LDR_8x6: return 6; + case basis_tex_format::cXUASTC_LDR_10x5: return 5; + case basis_tex_format::cXUASTC_LDR_10x6: return 6; + case basis_tex_format::cXUASTC_LDR_8x8: return 8; + case basis_tex_format::cXUASTC_LDR_10x8: return 8; + case basis_tex_format::cXUASTC_LDR_10x10: return 10; + case basis_tex_format::cXUASTC_LDR_12x10: return 10; + case basis_tex_format::cXUASTC_LDR_12x12: return 12; + case basis_tex_format::cASTC_LDR_4x4: return 4; + case basis_tex_format::cASTC_LDR_5x4: return 4; + case basis_tex_format::cASTC_LDR_5x5: return 5; + case basis_tex_format::cASTC_LDR_6x5: return 5; + case basis_tex_format::cASTC_LDR_6x6: return 6; + case basis_tex_format::cASTC_LDR_8x5: return 5; + case basis_tex_format::cASTC_LDR_8x6: return 6; + case basis_tex_format::cASTC_LDR_10x5: return 5; + case basis_tex_format::cASTC_LDR_10x6: return 6; + case basis_tex_format::cASTC_LDR_8x8: return 8; + case basis_tex_format::cASTC_LDR_10x8: return 8; + case basis_tex_format::cASTC_LDR_10x10: return 10; + case basis_tex_format::cASTC_LDR_12x10: return 10; + case basis_tex_format::cASTC_LDR_12x12: return 12; default: break; } @@ -13222,7 +13487,7 @@ namespace basist { case basis_tex_format::cUASTC_HDR_4x4: case basis_tex_format::cASTC_HDR_6x6: - case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE: + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: return true; default: break; @@ -13230,9 +13495,136 @@ namespace basist return false; } + // Given a basis_tex_format (mode or codec), return the corresponding ASTC texture_format with the proper block size from 4x4-12x12. + basisu::texture_format basis_get_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt) + { + switch (fmt) + { + case basis_tex_format::cXUASTC_LDR_4x4: + case basis_tex_format::cASTC_LDR_4x4: + return basisu::texture_format::cASTC_LDR_4x4; + case basis_tex_format::cXUASTC_LDR_5x4: + case basis_tex_format::cASTC_LDR_5x4: + return basisu::texture_format::cASTC_LDR_5x4; + case basis_tex_format::cXUASTC_LDR_5x5: + case basis_tex_format::cASTC_LDR_5x5: + return basisu::texture_format::cASTC_LDR_5x5; + case basis_tex_format::cXUASTC_LDR_6x5: + case basis_tex_format::cASTC_LDR_6x5: + return basisu::texture_format::cASTC_LDR_6x5; + case basis_tex_format::cXUASTC_LDR_6x6: + case basis_tex_format::cASTC_LDR_6x6: + return basisu::texture_format::cASTC_LDR_6x6; + case basis_tex_format::cXUASTC_LDR_8x5: + case basis_tex_format::cASTC_LDR_8x5: + return basisu::texture_format::cASTC_LDR_8x5; + case basis_tex_format::cXUASTC_LDR_8x6: + case basis_tex_format::cASTC_LDR_8x6: + return basisu::texture_format::cASTC_LDR_8x6; + case basis_tex_format::cXUASTC_LDR_10x5: + case basis_tex_format::cASTC_LDR_10x5: + return basisu::texture_format::cASTC_LDR_10x5; + case basis_tex_format::cXUASTC_LDR_10x6: + case basis_tex_format::cASTC_LDR_10x6: + return basisu::texture_format::cASTC_LDR_10x6; + case basis_tex_format::cXUASTC_LDR_8x8: + case basis_tex_format::cASTC_LDR_8x8: + return basisu::texture_format::cASTC_LDR_8x8; + case basis_tex_format::cXUASTC_LDR_10x8: + case basis_tex_format::cASTC_LDR_10x8: + return basisu::texture_format::cASTC_LDR_10x8; + case basis_tex_format::cXUASTC_LDR_10x10: + case basis_tex_format::cASTC_LDR_10x10: + return basisu::texture_format::cASTC_LDR_10x10; + case basis_tex_format::cXUASTC_LDR_12x10: + case basis_tex_format::cASTC_LDR_12x10: + return basisu::texture_format::cASTC_LDR_12x10; + case basis_tex_format::cXUASTC_LDR_12x12: + case basis_tex_format::cASTC_LDR_12x12: + return basisu::texture_format::cASTC_LDR_12x12; + default: + assert(0); + return basisu::texture_format::cInvalidTextureFormat; + } + } + + // Given any basis_tex_format (mode or codec), return the corresponding transcoder_texture_format with the proper ASTC block size from 4x4-12x12. + transcoder_texture_format basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt) + { + switch (fmt) + { + // XUASTC 4x4-12x12 and ASTC 4x4-12x12 + case basis_tex_format::cXUASTC_LDR_4x4: + case basis_tex_format::cASTC_LDR_4x4: + return transcoder_texture_format::cTFASTC_LDR_4x4_RGBA; + case basis_tex_format::cXUASTC_LDR_5x4: + case basis_tex_format::cASTC_LDR_5x4: + return transcoder_texture_format::cTFASTC_LDR_5x4_RGBA; + case basis_tex_format::cXUASTC_LDR_5x5: + case basis_tex_format::cASTC_LDR_5x5: + return transcoder_texture_format::cTFASTC_LDR_5x5_RGBA; + case basis_tex_format::cXUASTC_LDR_6x5: + case basis_tex_format::cASTC_LDR_6x5: + return transcoder_texture_format::cTFASTC_LDR_6x5_RGBA; + case basis_tex_format::cXUASTC_LDR_6x6: + case basis_tex_format::cASTC_LDR_6x6: + return transcoder_texture_format::cTFASTC_LDR_6x6_RGBA; + case basis_tex_format::cXUASTC_LDR_8x5: + case basis_tex_format::cASTC_LDR_8x5: + return transcoder_texture_format::cTFASTC_LDR_8x5_RGBA; + case basis_tex_format::cXUASTC_LDR_8x6: + case basis_tex_format::cASTC_LDR_8x6: + return transcoder_texture_format::cTFASTC_LDR_8x6_RGBA; + case basis_tex_format::cXUASTC_LDR_10x5: + case basis_tex_format::cASTC_LDR_10x5: + return transcoder_texture_format::cTFASTC_LDR_10x5_RGBA; + case basis_tex_format::cXUASTC_LDR_10x6: + case basis_tex_format::cASTC_LDR_10x6: + return transcoder_texture_format::cTFASTC_LDR_10x6_RGBA; + case basis_tex_format::cXUASTC_LDR_8x8: + case basis_tex_format::cASTC_LDR_8x8: + return transcoder_texture_format::cTFASTC_LDR_8x8_RGBA; + case basis_tex_format::cXUASTC_LDR_10x8: + case basis_tex_format::cASTC_LDR_10x8: + return transcoder_texture_format::cTFASTC_LDR_10x8_RGBA; + case basis_tex_format::cXUASTC_LDR_10x10: + case basis_tex_format::cASTC_LDR_10x10: + return transcoder_texture_format::cTFASTC_LDR_10x10_RGBA; + case basis_tex_format::cXUASTC_LDR_12x10: + case basis_tex_format::cASTC_LDR_12x10: + return transcoder_texture_format::cTFASTC_LDR_12x10_RGBA; + case basis_tex_format::cXUASTC_LDR_12x12: + case basis_tex_format::cASTC_LDR_12x12: + return transcoder_texture_format::cTFASTC_LDR_12x12_RGBA; + + // ETC1S/UASTC LDR 4x4 + case basis_tex_format::cETC1S: + case basis_tex_format::cUASTC_LDR_4x4: + return transcoder_texture_format::cTFASTC_LDR_4x4_RGBA; + + // HDR formats + case basis_tex_format::cUASTC_HDR_4x4: + return transcoder_texture_format::cTFASTC_HDR_4x4_RGBA; + + case basis_tex_format::cASTC_HDR_6x6: + case basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE: + return transcoder_texture_format::cTFASTC_HDR_6x6_RGBA; + + default: + assert(0); + return transcoder_texture_format::cTFASTC_LDR_4x4_RGBA; + } + } + + transcoder_texture_format basis_get_transcoder_texture_format_from_basis_tex_format(basis_tex_format fmt) + { + return basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(fmt); + } + + // For a given basis_tex_format (mode or codec), is the specified transcoder_texture_format supported? bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { - if ((fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)) + if ((fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE)) { // RDO UASTC HDR 6x6, or our custom intermediate format #if BASISD_SUPPORT_UASTC_HDR @@ -13266,37 +13658,182 @@ namespace basist } #endif } - else if (fmt == basis_tex_format::cUASTC4x4) + else if (fmt == basis_tex_format::cUASTC_LDR_4x4) { // UASTC LDR 4x4 #if BASISD_SUPPORT_UASTC + // IMPORTANT : This is defined as the formats which DON'T support UASTC LDR 4x4 transcoding. switch (tex_type) { - // These niche formats aren't currently supported for UASTC - everything else is. + // These niche formats aren't currently supported for UASTC LDR 4x4 - everything else is. case transcoder_texture_format::cTFPVRTC2_4_RGB: case transcoder_texture_format::cTFPVRTC2_4_RGBA: case transcoder_texture_format::cTFATC_RGB: case transcoder_texture_format::cTFATC_RGBA: case transcoder_texture_format::cTFFXT1_RGB: - // UASTC LDR doesn't support transcoding to HDR formats + // UASTC LDR 4x4 doesn't support transcoding to HDR formats case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: case transcoder_texture_format::cTFBC6H: case transcoder_texture_format::cTFRGBA_HALF: case transcoder_texture_format::cTFRGB_HALF: case transcoder_texture_format::cTFRGB_9E5: + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return false; default: return true; } #endif } + else if ( (basis_tex_format_is_xuastc_ldr(fmt)) || (basis_tex_format_is_astc_ldr(fmt)) ) + { + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + switch (tex_type) + { + case transcoder_texture_format::cTFBC1_RGB: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFBC5_RG: + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFETC2_EAC_R11: + case transcoder_texture_format::cTFETC2_EAC_RG11: + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + // Uncompressed formats + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; + default: + break; + } + + // Ensure they're using the block size for ASTC LDR that matches the XUASTC format's block size. + switch (fmt) + { + case basis_tex_format::cXUASTC_LDR_4x4: + case basis_tex_format::cASTC_LDR_4x4: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_4x4_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_5x4: + case basis_tex_format::cASTC_LDR_5x4: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_5x4_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_5x5: + case basis_tex_format::cASTC_LDR_5x5: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_5x5_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_6x5: + case basis_tex_format::cASTC_LDR_6x5: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_6x5_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_6x6: + case basis_tex_format::cASTC_LDR_6x6: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_6x6_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_8x5: + case basis_tex_format::cASTC_LDR_8x5: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_8x5_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_8x6: + case basis_tex_format::cASTC_LDR_8x6: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_8x6_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_10x5: + case basis_tex_format::cASTC_LDR_10x5: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_10x5_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_10x6: + case basis_tex_format::cASTC_LDR_10x6: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_10x6_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_8x8: + case basis_tex_format::cASTC_LDR_8x8: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_8x8_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_10x8: + case basis_tex_format::cASTC_LDR_10x8: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_10x8_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_10x10: + case basis_tex_format::cASTC_LDR_10x10: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_10x10_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_12x10: + case basis_tex_format::cASTC_LDR_12x10: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_12x10_RGBA) + return true; + break; + } + case basis_tex_format::cXUASTC_LDR_12x12: + case basis_tex_format::cASTC_LDR_12x12: + { + if (tex_type == transcoder_texture_format::cTFASTC_LDR_12x12_RGBA) + return true; + break; + } + default: + break; + } + } else { // ETC1S switch (tex_type) { - // ETC1 and uncompressed are always supported. + // ETC1 and uncompressed are always supported. case transcoder_texture_format::cTFETC1_RGB: case transcoder_texture_format::cTFRGBA32: case transcoder_texture_format::cTFRGB565: @@ -13330,8 +13867,8 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return true; #endif -#if BASISD_SUPPORT_ASTC - case transcoder_texture_format::cTFASTC_4x4_RGBA: +#if BASISD_SUPPORT_ASTC + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: return true; #endif #if BASISD_SUPPORT_ATC @@ -13361,9 +13898,9 @@ namespace basist return false; } - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // UASTC LDR 4x4 - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_UASTC const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = @@ -14088,7 +14625,7 @@ namespace basist if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -14380,7 +14917,7 @@ namespace basist bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) { //memset(&unpacked, 0, sizeof(unpacked)); - + #if 0 uint8_t table[128]; memset(table, 0xFF, sizeof(table)); @@ -14436,7 +14973,7 @@ namespace basist return true; } - + if (read_hints) { if (g_uastc_mode_has_bc1_hint0[mode]) @@ -14469,7 +15006,7 @@ namespace basist } else bit_ofs += g_uastc_mode_total_hint_bits[mode]; - + uint32_t subsets = 1; switch (mode) { @@ -14682,7 +15219,7 @@ namespace basist { // All other modes have <= 64 weight bits. uint64_t bits; - + // Read the weight bits if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); @@ -14690,31 +15227,31 @@ namespace basist { bits = blk.m_dwords[2]; bits |= (((uint64_t)blk.m_dwords[3]) << 32U); - + if (bit_ofs >= 64U) bits >>= (bit_ofs - 64U); else { assert(bit_ofs >= 56U); - + uint32_t bits_needed = 64U - bit_ofs; bits <<= bits_needed; bits |= (blk.m_bytes[7] >> (8U - bits_needed)); } } - + bit_ofs = 0; const uint32_t mask = (1U << weight_bits) - 1U; const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; - + if (total_planes == 2) { // Dual plane modes always have a single subset, and the first 2 weights are anchors. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); - + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); @@ -14732,7 +15269,7 @@ namespace basist if (weight_bits == 4) { assert(bit_ofs == 0); - + // Specialize the most common case: 4-bit weights. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); @@ -14991,7 +15528,7 @@ namespace basist return unpack_uastc(unpacked_blk, pPixels, srgb); } - // Determines the best shared pbits to use to encode xl/xh + // Determines the best shared pbit to use to encode xl/xh static void determine_shared_pbits( uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) @@ -15195,6 +15732,8 @@ namespace basist } uint32_t best_pbits[2]; + basisu::clear_obj(best_pbits); + color_quad_u8 bestMinColor, bestMaxColor; determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); @@ -15278,7 +15817,7 @@ namespace basist } case 2: { - // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 dst_blk.m_mode = 1; dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; @@ -15505,6 +16044,7 @@ namespace basist case UASTC_MODE_INDEX_SOLID_COLOR: { // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6) + // TODO: Why prefer mode 6 here? Mode 5 is lossless. const color32& solid_color = unpacked_src_blk.m_solid_color; uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error + @@ -16217,7 +16757,7 @@ namespace basist bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); // non-flipped: | | - // vs. + // vs. // flipped: -- // -- @@ -16828,7 +17368,7 @@ namespace basist static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; - + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) { uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; @@ -16916,7 +17456,7 @@ namespace basist a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); } - + { const int v0 = pPixels[8 * stride] * 14 + bias; const int v1 = pPixels[9 * stride] * 14 + bias; @@ -16940,7 +17480,7 @@ namespace basist } const uint64_t f = a0 | a1 | a2 | a3; - + pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -16963,7 +17503,7 @@ namespace basist int dots[4]; for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; ar *= 2; ag *= 2; ab *= 2; @@ -16972,7 +17512,7 @@ namespace basist { const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - + // Rounding matters here! // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; @@ -17013,11 +17553,11 @@ namespace basist sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; } } - + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) { // Derived from bc7enc16's LS function. - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; @@ -17091,7 +17631,7 @@ namespace basist return true; } - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) { dxt1_block* pDst_block = static_cast(pDst); @@ -17143,19 +17683,19 @@ namespace basist { const color32* pSrc_pixels = (const color32*)pPixels; dxt1_block* pDst_block = static_cast(pDst); - + int avg_r = -1, avg_g = 0, avg_b = 0; int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; uint8_t sels[16]; - + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; if (use_sels) { // Caller is jamming in their own selectors for us to try. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); - + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; - + for (uint32_t i = 0; i < 16; i++) sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; } @@ -17167,13 +17707,13 @@ namespace basist for (j = 1; j < 16; j++) if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; - + if (j == 16) { encode_bc1_solid_block(pDst, fr, fg, fb); return; } - + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) int total_r = fr, total_g = fg, total_b = fb; int max_r = fr, max_g = fg, max_b = fb; @@ -17207,7 +17747,7 @@ namespace basist float cov[6]; for (uint32_t i = 0; i < 6; i++) cov[i] = static_cast(icov[i])* (1.0f / 255.0f); - + #if 0 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta @@ -17239,7 +17779,7 @@ namespace basist saxis_b = (int)(xb * m); } #endif - + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; for (uint32_t i = 0; i < 16; i++) { @@ -17263,7 +17803,7 @@ namespace basist hr = to_5(pSrc_pixels[high_c].r); hg = to_6(pSrc_pixels[high_c].g); hb = to_5(pSrc_pixels[high_c].b); - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } // if (use_sels) @@ -17310,13 +17850,13 @@ namespace basist hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); - + // Always forbid 3 color blocks if (lc16 == hc16) { @@ -17368,7 +17908,7 @@ namespace basist pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } - + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) { const color32* pSrc_pixels = (const color32*)pPixels; @@ -17417,8 +17957,8 @@ namespace basist min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); total_r += r; total_g += g; total_b += b; } - - if (grayscale_flag) + + if (grayscale_flag) { // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) @@ -17735,7 +18275,7 @@ namespace basist // Always forbid 3 color blocks uint16_t lc16 = (uint16_t)b.get_low_color(); uint16_t hc16 = (uint16_t)b.get_high_color(); - + uint8_t mask = 0; // Make l > h @@ -17965,7 +18505,7 @@ namespace basist blk.m_base = static_cast(a); blk.m_table = 13; blk.m_multiplier = 0; - + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; @@ -18655,7 +19195,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); if (from_alpha) @@ -18714,7 +19254,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGBA bounding box + // Get block's RGBA bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); for (uint32_t i = 0; i < 16; i++) @@ -18830,9 +19370,9 @@ namespace basist #endif // #if BASISD_SUPPORT_UASTC -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ // KTX2 -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; @@ -18854,8 +19394,8 @@ namespace basist m_key_values.clear(); memset((void *)&m_etc1s_header, 0, sizeof(m_etc1s_header)); m_etc1s_image_descs.clear(); - m_astc_6x6_intermediate_image_descs.clear(); - + m_slice_offset_len_descs.clear(); + m_format = basist::basis_tex_format::cETC1S; m_dfd_color_model = 0; @@ -18867,14 +19407,19 @@ namespace basist m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; m_etc1s_transcoder.clear(); - + m_def_transcoder_state.clear(); - + m_has_alpha = false; m_is_video = false; m_ldr_hdr_upconversion_nit_multiplier = 0.0f; } + static bool is_vk_format_astc_ldr(uint32_t fmt) + { + return (fmt >= KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK) && (fmt <= KTX2_FORMAT_ASTC_12x12_SRGB_BLOCK); + } + bool ktx2_transcoder::init(const void* pData, uint32_t data_size) { clear(); @@ -18904,9 +19449,10 @@ namespace basist memcpy((void *)&m_header, pData, sizeof(m_header)); // Check for supported VK formats. We may also need to parse the DFD. - if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && - (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) && - (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK)) + if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && + (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) && + (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK) && + !is_vk_format_astc_ldr(m_header.m_vk_format)) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n"); return false; @@ -18943,7 +19489,7 @@ namespace basist return false; } } - + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" if (m_header.m_level_count < 1) { @@ -18958,22 +19504,22 @@ namespace basist return false; } - if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD) + if ((m_header.m_supercompression_scheme == KTX2_SS_UASTC_HDR_6x6I) || + (m_header.m_supercompression_scheme == KTX2_SS_XUASTC_LDR)) + { + // standard UASTC HDR 6x6i file (as adopted by khronos, not our initial v1.6/v2.0 release), or XUASTC LDR - DFD colormodels unchanged however + } + else if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n"); return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + // Sanity check SGD offset/length + if ((m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) || + (m_header.m_supercompression_scheme == KTX2_SS_UASTC_HDR_6x6I) || + (m_header.m_supercompression_scheme == KTX2_SS_XUASTC_LDR)) { -#if 0 - if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header)) - { - BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n"); - return false; - } -#endif - if (m_header.m_sgd_byte_offset.get_uint64() < sizeof(ktx2_header)) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n"); @@ -19002,7 +19548,7 @@ namespace basist } memcpy((void *)&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); - + // Sanity check the level offsets and byte sizes for (uint32_t i = 0; i < m_levels.size(); i++) { @@ -19022,25 +19568,29 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); return false; } - + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; - + if (m_levels[i].m_uncompressed_byte_length.get_uint64() >= MAX_SANE_LEVEL_UNCOMP_SIZE) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + if ((m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) || + (m_header.m_supercompression_scheme == KTX2_SS_UASTC_HDR_6x6I) || + (m_header.m_supercompression_scheme == KTX2_SS_XUASTC_LDR)) { + // Our supercompressed codec formats: Uncompressed length should be 0 if (m_levels[i].m_uncompressed_byte_length.get_uint64()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n"); return false; } } - else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD) + else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { + // Uses Zstandard supercompression, ensure uncompressed length is valid. if (!m_levels[i].m_uncompressed_byte_length.get_uint64()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n"); @@ -19061,7 +19611,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); return false; } - + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) @@ -19071,17 +19621,16 @@ namespace basist } memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); - - // This is all hard coded for only ETC1S and UASTC. + uint32_t dfd_total_size = basisu::read_le_dword(pDFD); - + // 3.10.3: Sanity check if (dfd_total_size != m_header.m_dfd_byte_length) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); return false; } - + // 3.10.3: More sanity checking if (m_header.m_kvd_byte_length) { @@ -19094,12 +19643,16 @@ namespace basist const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); - + const uint32_t texel_block_dimensions = basisu::read_le_dword(pDFD + 4 * sizeof(uint32_t)); + m_dfd_color_model = dfd_bits & 255; m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); m_dfd_transfer_func = (dfd_bits >> 16) & 255; m_dfd_flags = (dfd_bits >> 24) & 255; + const uint32_t block_width = (texel_block_dimensions & 0xFF) + 1; + const uint32_t block_height = ((texel_block_dimensions >> 8) & 0xFF) + 1; + // See 3.10.1.Restrictions if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB)) { @@ -19107,7 +19660,59 @@ namespace basist return false; } - if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) + if (is_vk_format_astc_ldr(m_header.m_vk_format)) + { + // ASTC LDR 4x4-12x12 + // We usually read the DFD and decide the format from there. This decides off the VK format. + if (m_dfd_color_model != KTX2_KDF_DF_MODEL_ASTC) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD color model (expected ASTC)\n"); + return false; + } + + uint32_t vk_fmt = m_header.m_vk_format; + const bool is_srgb_fmt = (vk_fmt & 1) == 0; + + if (is_srgb_fmt) + vk_fmt--; + + switch (vk_fmt) + { + case KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_4x4; break; + case KTX2_FORMAT_ASTC_5x4_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_5x4; break; + case KTX2_FORMAT_ASTC_5x5_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_5x5; break; + case KTX2_FORMAT_ASTC_6x5_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_6x5; break; + case KTX2_FORMAT_ASTC_6x6_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_6x6; break; + case KTX2_FORMAT_ASTC_8x5_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_8x5; break; + case KTX2_FORMAT_ASTC_8x6_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_8x6; break; + case KTX2_FORMAT_ASTC_8x8_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_8x8; break; + case KTX2_FORMAT_ASTC_10x5_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_10x5; break; + case KTX2_FORMAT_ASTC_10x6_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_10x6; break; + case KTX2_FORMAT_ASTC_10x8_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_10x8; break; + case KTX2_FORMAT_ASTC_10x10_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_10x10; break; + case KTX2_FORMAT_ASTC_12x10_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_12x10; break; + case KTX2_FORMAT_ASTC_12x12_UNORM_BLOCK: m_format = basis_tex_format::cASTC_LDR_12x12; break; + default: + assert(0); + return false; + } + + // Sanity check the vkformat's astc block size vs. the DFD's. + uint32_t actual_block_width = 0, actual_block_height = 0; + get_basis_tex_format_block_size(m_format, actual_block_width, actual_block_height); + if ((actual_block_width != block_width) || (actual_block_height != block_height)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: vkFormat's ASTC block size is not in sync with the DFD's block dimensions\n"); + return false; + } + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) { if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) { @@ -19116,11 +19721,11 @@ namespace basist } m_format = basist::basis_tex_format::cETC1S; - + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count." // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. m_has_alpha = (m_header.m_dfd_byte_length == 60); - + m_dfd_samples = m_has_alpha ? 2 : 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); @@ -19138,11 +19743,11 @@ namespace basist return false; } - m_format = basist::basis_tex_format::cUASTC4x4; + m_format = basist::basis_tex_format::cUASTC_LDR_4x4; m_dfd_samples = 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); - + // We're assuming "DATA" means RGBA so it has alpha. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); } @@ -19180,8 +19785,10 @@ namespace basist m_has_alpha = false; } - else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE) + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE) { + // Note: The supercompression scheme may be BASISLZ if it's an old format (v1.6/v2.0) file + // Custom variable block size ASTC HDR 6x6 texture data. if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) { @@ -19189,20 +19796,59 @@ namespace basist return false; } - m_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE; + m_format = basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE; m_dfd_samples = 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); m_has_alpha = false; } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE) + { + if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n"); + return false; + } + + // Extract ASTC block dimensions from texel_block_dimensions, validate, select basis_tex_format. + m_format = basist::basis_tex_format::cETC1S; // bogus value to start + +#define BUT_BLOCK_SIZE(x, y, t) if ((block_width == (x)) && (block_height == (y))) { m_format = (t); } + BUT_BLOCK_SIZE(4, 4, basis_tex_format::cXUASTC_LDR_4x4); + BUT_BLOCK_SIZE(5, 4, basis_tex_format::cXUASTC_LDR_5x4); + BUT_BLOCK_SIZE(5, 5, basis_tex_format::cXUASTC_LDR_5x5); + BUT_BLOCK_SIZE(6, 5, basis_tex_format::cXUASTC_LDR_6x5); + BUT_BLOCK_SIZE(6, 6, basis_tex_format::cXUASTC_LDR_6x6); + BUT_BLOCK_SIZE(8, 5, basis_tex_format::cXUASTC_LDR_8x5); + BUT_BLOCK_SIZE(8, 6, basis_tex_format::cXUASTC_LDR_8x6); + BUT_BLOCK_SIZE(10, 5, basis_tex_format::cXUASTC_LDR_10x5); + BUT_BLOCK_SIZE(10, 6, basis_tex_format::cXUASTC_LDR_10x6); + BUT_BLOCK_SIZE(8, 8, basis_tex_format::cXUASTC_LDR_8x8); + BUT_BLOCK_SIZE(10, 8, basis_tex_format::cXUASTC_LDR_10x8); + BUT_BLOCK_SIZE(10, 10, basis_tex_format::cXUASTC_LDR_10x10); + BUT_BLOCK_SIZE(12, 10, basis_tex_format::cXUASTC_LDR_12x10); + BUT_BLOCK_SIZE(12, 12, basis_tex_format::cXUASTC_LDR_12x12); +#undef BUT_BLOCK_SIZE + + if (m_format == basist::basis_tex_format::cETC1S) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported XUASTC LDR block dimensions (not valid ASTC)\n"); + return false; + } + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } else { // Unsupported DFD color model. BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); return false; } - + if (!read_key_values()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); @@ -19261,7 +19907,7 @@ namespace basist return nullptr; } - + bool ktx2_transcoder::start_transcoding() { if (!m_pData) @@ -19270,14 +19916,16 @@ namespace basist return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + // In standard KTX2 file, KTX2_SS_BASISLZ would ONLY be ETC1S, but in v1.6 and v2.0 it could also mean UASTC HDR 6x6i or XUASTC LDR. + // We support our older files, too. + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) { if (m_format == basis_tex_format::cETC1S) { // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. if (!m_etc1s_transcoder.get_endpoints().empty()) return true; - + if (!decompress_etc1s_global_data()) { BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); @@ -19301,14 +19949,16 @@ namespace basist } } } - else if (m_format == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + // check for old-style (non-standard) KTX2 files written by v1.6/v2.0 + else if ( (m_format == basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) || basis_tex_format_is_xuastc_ldr(m_format) ) { - if (m_astc_6x6_intermediate_image_descs.size()) + // UASTC HDR 6x6 and XUASTC LDR 4x4-12x12 require an array of slice offset/len structs to determine where the compressed data starts for each independent compressed texture slice. + if (m_slice_offset_len_descs.size()) return true; - if (!read_astc_6x6_hdr_intermediate_global_data()) + if (!read_slice_offset_len_global_data(false)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_astc_6x6_hdr_intermediate_global_data() failed\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_slice_offset_len_global_data() failed\n"); return false; } } @@ -19318,6 +19968,18 @@ namespace basist return false; } } + else if ((m_header.m_supercompression_scheme == KTX2_SS_UASTC_HDR_6x6I) || (m_header.m_supercompression_scheme == KTX2_SS_XUASTC_LDR)) + { + // UASTC HDR 6x6 and XUASTC LDR 4x4-12x12 require an array of slice offset/len structs to determine where the compressed data starts for each independent compressed texture slice. + if (m_slice_offset_len_descs.size()) + return true; + + if (!read_slice_offset_len_global_data(true)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_slice_offset_len_global_data() failed\n"); + return false; + } + } else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if !BASISD_SUPPORT_KTX2_ZSTD @@ -19356,7 +20018,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); return false; } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); @@ -19380,7 +20042,7 @@ namespace basist level_info.m_total_blocks = num_blocks_x * num_blocks_y; level_info.m_alpha_flag = m_has_alpha; level_info.m_iframe_flag = false; - + if (m_etc1s_image_descs.size()) { const uint32_t etc1s_image_index = @@ -19393,9 +20055,9 @@ namespace basist return true; } - + bool ktx2_transcoder::transcode_image_level( - uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, basist::transcoder_texture_format fmt, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, @@ -19403,16 +20065,16 @@ namespace basist { if (!m_pData) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: Must call init() first\n"); return false; } if (!pState) pState = &m_def_transcoder_state; - + if (level_index >= m_levels.size()) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: level_index >= m_levels.size()\n"); return false; } @@ -19420,34 +20082,34 @@ namespace basist { if (face_index >= 6) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: face_index >= 6\n"); return false; } } else if (face_index != 0) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: face_index != 0\n"); return false; } if (layer_index >= basisu::maximum(m_header.m_layer_count, 1)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum(m_header.m_layer_count, 1)\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: layer_index >= maximum(m_header.m_layer_count, 1)\n"); return false; } const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset.get_uint64(); uint64_t comp_level_data_size = m_levels[level_index].m_byte_length.get_uint64(); - + const uint8_t* pUncomp_level_data = pComp_level_data; uint64_t uncomp_level_data_size = comp_level_data_size; if (uncomp_level_data_size > UINT32_MAX) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: uncomp_level_data_size > UINT32_MAX\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { // Check if we've already decompressed this level's supercompressed data. @@ -19456,7 +20118,7 @@ namespace basist // Uncompress the entire level's supercompressed data. if (!decompress_level_data(level_index, pState->m_level_uncomp_data)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: decompress_level_data() failed\n"); return false; } pState->m_uncomp_data_level_index = level_index; @@ -19465,18 +20127,19 @@ namespace basist pUncomp_level_data = pState->m_level_uncomp_data.data(); uncomp_level_data_size = pState->m_level_uncomp_data.size(); } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks4_x = (level_width + 3) >> 2; const uint32_t num_blocks4_y = (level_height + 3) >> 2; - + if (m_format == basist::basis_tex_format::cETC1S) { + // ETC1S // Ensure start_transcoding() was called. if (m_etc1s_transcoder.get_endpoints().empty()) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: must call start_transcoding() first\n"); return false; } @@ -19484,12 +20147,11 @@ namespace basist (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + layer_index * m_header.m_face_count + face_index; - + // Sanity check if (etc1s_image_index >= m_etc1s_image_descs.size()) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n"); - assert(0); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: etc1s_image_index >= m_etc1s_image_descs.size()\n"); return false; } @@ -19504,15 +20166,16 @@ namespace basist decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); return false; } } - else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + else if (m_format == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) { - if (!m_astc_6x6_intermediate_image_descs.size()) + // UASTC HDR 6x6i + if (!m_slice_offset_len_descs.size()) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: must call start_transcoding() first\n"); return false; } @@ -19525,102 +20188,219 @@ namespace basist face_index; // Sanity check - if (image_index >= m_astc_6x6_intermediate_image_descs.size()) + if (image_index >= m_slice_offset_len_descs.size()) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Invalid image_index\n"); - assert(0); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: Invalid image_index\n"); return false; } - const ktx2_astc_hdr_6x6_intermediate_image_desc& image_desc = m_astc_6x6_intermediate_image_descs[image_index]; - + const ktx2_slice_offset_len_desc_orig& image_desc = m_slice_offset_len_descs[image_index]; + if (!m_astc_hdr_6x6_intermediate_transcoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, m_data_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index, - m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length, + m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_slice_byte_offset, image_desc.m_slice_byte_length, decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); return false; } } else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6) { + // plain ASTC HDR 6x6 const uint32_t num_blocks6_x = (level_width + 5) / 6; const uint32_t num_blocks6_y = (level_height + 5) / 6; // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64()); - const uint32_t total_2D_image_size = num_blocks6_x * num_blocks6_y * sizeof(astc_helpers::astc_block); - const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + const uint64_t total_2D_image_size = (uint64_t)num_blocks6_x * num_blocks6_y * sizeof(astc_helpers::astc_block); + + const uint64_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + if ((total_2D_image_size > UINT32_MAX) || ((size_t)uncomp_ofs != uncomp_ofs)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: size too large\n"); + return false; + } // Sanity checks if (uncomp_ofs >= uncomp_level_data_size) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: uncomp_ofs >= total_2D_image_size\n"); return false; } if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); return false; } + assert(total_2D_image_size <= UINT32_MAX); + if (!m_astc_hdr_6x6_transcoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, - (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index, + (const uint8_t*)pUncomp_level_data + (size_t)uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if (basis_tex_format_is_astc_ldr(m_format)) + { + // ASTC LDR 4x4-12x12 + const uint32_t block_width = get_block_width(), block_height = get_block_height(); + + const uint32_t num_blocks_x = (level_width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (level_height + block_height - 1) / block_height; + + //assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64()); + if (uncomp_level_data_size != m_levels[level_index].m_uncompressed_byte_length.get_uint64()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: m_uncompressed_byte_length is invalid\n"); + return false; + } + + const uint64_t total_2D_image_size = (uint64_t)num_blocks_x * num_blocks_y * sizeof(astc_helpers::astc_block); + + const uint64_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + if ((total_2D_image_size > UINT32_MAX) || ((size_t)uncomp_ofs != uncomp_ofs)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: size too large\n"); + return false; + } + + // Sanity checks + if (uncomp_ofs >= uncomp_level_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: uncomp_ofs >= total_2D_image_size\n"); + return false; + } + + if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + return false; + } + + assert(total_2D_image_size <= UINT32_MAX); + + // if the header's vkformat is odd, it's linear, even is sRGB + const bool uses_astc_src_decode_profile = ((uint32_t)m_header.m_vk_format & 1) == 0; + + if (!m_xuastc_ldr_transcoder.transcode_image(m_format, uses_astc_src_decode_profile, fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + (size_t)uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, 0, (uint32_t)total_2D_image_size, decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + + } + else if (basis_tex_format_is_xuastc_ldr(m_format)) + { + // XUASTC LDR 4x4-12x12 + if (!m_slice_offset_len_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: must call start_transcoding() first\n"); + return false; + } + + const uint32_t block_width = get_block_width(), block_height = get_block_height(); + + const uint32_t num_blocks_x = (level_width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (level_height + block_height - 1) / block_height; + + const uint32_t image_index = + (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + // Sanity check + if (image_index >= m_slice_offset_len_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: Invalid image_index\n"); + return false; + } + + const ktx2_slice_offset_len_desc_orig& image_desc = m_slice_offset_len_descs[image_index]; + + // XUASTC LDR has its own tiny header at the start of the compressed data with this profile bit, so it'll use that for decoding if needed. + bool uses_astc_src_decode_profile = true; + + if (!m_xuastc_ldr_transcoder.transcode_image(m_format, uses_astc_src_decode_profile, fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + m_pData, m_data_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, + m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_slice_byte_offset, image_desc.m_slice_byte_length, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: XUASTC LDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); return false; } + } - else if ((m_format == basist::basis_tex_format::cUASTC4x4) || + else if ((m_format == basist::basis_tex_format::cUASTC_LDR_4x4) || (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)) { + // UASTC LDR 4x4 and UASTC HDR 4x4 + // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64()); - const uint32_t total_2D_image_size = num_blocks4_x * num_blocks4_y * KTX2_UASTC_BLOCK_SIZE; + const uint64_t total_2D_image_size = (uint64_t)num_blocks4_x * num_blocks4_y * KTX2_UASTC_BLOCK_SIZE; + + const uint64_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; - const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + if ((total_2D_image_size > UINT32_MAX) || ((size_t)uncomp_ofs != uncomp_ofs)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: size too large\n"); + return false; + } // Sanity checks if (uncomp_ofs >= uncomp_level_data_size) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: uncomp_ofs >= total_2D_image_size\n"); return false; } if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); return false; } + assert(total_2D_image_size <= UINT32_MAX); + if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4) { + // UASTC HDR 4x4 if (!m_uastc_hdr_transcoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index, 0, (uint32_t)total_2D_image_size, decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); return false; } } else { - if (!m_uastc_transcoder.transcode_image(fmt, + // UASTC LDR 4x4 + if (!m_uastc_ldr_transcoder.transcode_image(fmt, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index, 0, (uint32_t)total_2D_image_size, decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) { - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); return false; } } @@ -19628,19 +20408,19 @@ namespace basist else { // Shouldn't get here. - BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_level: Internal error\n"); assert(0); return false; } return true; } - + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) { const uint8_t* pComp_data = m_levels[level_index].m_byte_offset.get_uint64() + m_pData; const uint64_t comp_size = m_levels[level_index].m_byte_length.get_uint64(); - + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length.get_uint64(); if (((size_t)comp_size) != comp_size) @@ -19659,7 +20439,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if BASISD_SUPPORT_KTX2_ZSTD @@ -19675,6 +20455,8 @@ namespace basist return false; } #else + BASISU_NOTE_UNUSED(pComp_data); + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n"); return false; #endif @@ -19683,38 +20465,59 @@ namespace basist return true; } - bool ktx2_transcoder::read_astc_6x6_hdr_intermediate_global_data() + bool ktx2_transcoder::read_slice_offset_len_global_data(bool read_std_structs) { const uint32_t image_count = basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count; assert(image_count); - + const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset.get_uint64(); - if (m_header.m_sgd_byte_length.get_uint64() != image_count * sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc)) + m_slice_offset_len_descs.resize(image_count); + + // SGD offset/length already sanity checked to be inside the file and after the KTX2 header. + if (read_std_structs) { - BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: Invalid global data length\n"); - return false; - } + if (m_header.m_sgd_byte_length.get_uint64() != image_count * sizeof(ktx2_slice_offset_len_desc_std)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_slice_offset_len_global_data: Invalid global data length (0)\n"); + return false; + } - m_astc_6x6_intermediate_image_descs.resize(image_count); + const ktx2_slice_offset_len_desc_std* pSrc_std_descs = reinterpret_cast(pSrc); - memcpy((void *)m_astc_6x6_intermediate_image_descs.data(), pSrc, sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc) * image_count); + for (uint32_t i = 0; i < image_count; i++) + { + // TODO: Ignoring type (profile) for now, but we could check it + m_slice_offset_len_descs[i].m_slice_byte_offset = pSrc_std_descs[i].m_slice_byte_offset; + m_slice_offset_len_descs[i].m_slice_byte_length = pSrc_std_descs[i].m_slice_byte_length; + } + } + else + { + if (m_header.m_sgd_byte_length.get_uint64() != image_count * sizeof(ktx2_slice_offset_len_desc_orig)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_slice_offset_len_global_data: Invalid global data length (1)\n"); + return false; + } + + memcpy((void*)m_slice_offset_len_descs.data(), pSrc, sizeof(ktx2_slice_offset_len_desc_orig) * image_count); + } // Sanity check the image descs for (uint32_t i = 0; i < image_count; i++) { // transcode_image() will validate the slice offsets/lengths before transcoding. - if (!m_astc_6x6_intermediate_image_descs[i].m_rgb_slice_byte_length) + if (!m_slice_offset_len_descs[i].m_slice_byte_length) { - BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: image descs sanity check failed (1)\n"); + BASISU_DEVEL_ERROR("ktx2_transcoder::read_slice_offset_len_global_data: image descs sanity check failed (1)\n"); return false; } } return true; } - + bool ktx2_transcoder::decompress_etc1s_global_data() { // Note: we don't actually support 3D textures in here yet @@ -19753,13 +20556,13 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_image_descs.try_resize(image_count)) { BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); return false; } - + memcpy((void *)m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; @@ -19793,7 +20596,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_transcoder.decode_palettes( m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) @@ -19801,7 +20604,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); return false; } - + return true; } @@ -19823,7 +20626,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n"); return false; } - + if ((m_header.m_kvd_byte_offset.get_uint64() + m_header.m_kvd_byte_length.get_uint64()) > m_data_size) { BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n"); @@ -19842,7 +20645,7 @@ namespace basist while (src_left > sizeof(uint32_t)) { uint32_t l = basisu::read_le_dword(pSrc); - + pSrc += sizeof(uint32_t); src_left -= sizeof(uint32_t); @@ -19863,7 +20666,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); return false; } - + basisu::uint8_vec& key_data = m_key_values.back().m_key; basisu::uint8_vec& value_data = m_key_values.back().m_value; @@ -19892,7 +20695,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); return false; } - + if (!value_data.try_resize(l)) { BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); @@ -19928,7 +20731,7 @@ namespace basist return true; } - + #endif // BASISD_SUPPORT_KTX2 bool basisu_transcoder_supports_ktx2() @@ -19995,10 +20798,10 @@ namespace basist basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m); return result; } - + //------------------------------------------------------------------------------------------------ // HDR support - // + // // Originally from bc6h_enc.cpp // BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed @@ -20027,7 +20830,7 @@ namespace basist const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] = { // comp_index, subset*2+lh_index, last_bit, first_bit - //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta + //------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta { { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 }, { 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 }, { 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} }, @@ -20078,7 +20881,7 @@ namespace basist { { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} } }; - // The same as the first 32 2-subset patterns in BC7. + // The same as the first 32 2-subset patterns in BC7. // Bit 7 is a flag indicating that the weight uses 1 less bit than usual. const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x] { @@ -20102,7 +20905,7 @@ namespace basist const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - + static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h) { assert((num_bits) && (num_bits < 64) && (bit_pos < 128)); @@ -20231,15 +21034,18 @@ namespace basist return unq; } #endif - + // 6,7,8,9,10,11,12 const uint32_t BC6H_BLOG_TAB_MIN = 6; const uint32_t BC6H_BLOG_TAB_MAX = 12; //const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1; - + // Handles 16, or 6-12 bits. Others assert. static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits) { + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN); + BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX); + assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); assert((num_bits == 16) || ((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX))); @@ -20255,7 +21061,7 @@ namespace basist else { assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX)); - + // Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints). return bc6h_half_to_blog(h, num_bits); } @@ -20385,7 +21191,7 @@ namespace basist log_blk.m_mode = mode; pack_bc6h_block(*pPacked_block, log_blk); - + return; } @@ -20785,13 +21591,13 @@ namespace basist half_float endpoints[3][2]; endpoints[0][0] = pColor[0]; endpoints[0][1] = pColor[0]; - + endpoints[1][0] = pColor[1]; endpoints[1][1] = pColor[1]; endpoints[2][0] = pColor[2]; endpoints[2][1] = pColor[2]; - + bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights); return true; @@ -20828,7 +21634,7 @@ namespace basist static inline int astc_hdr_sign_extend(int src, int num_src_bits) { - assert(basisu::in_range(num_src_bits, 2, 31)); + assert(basisu::is_in_range(num_src_bits, 2, 31)); const bool negative = (src & (1 << (num_src_bits - 1))) != 0; if (negative) @@ -21069,7 +21875,7 @@ namespace basist if (ohm & 0x12) vb1 |= (x3 << 7); const int shamt = (mode >> 1) ^ 3; - + va = (uint32_t)va << shamt; vb0 = (uint32_t)vb0 << shamt; vb1 = (uint32_t)vb1 << shamt; @@ -21139,7 +21945,7 @@ namespace basist { assert(g_astc_hdr_core_initialized); assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8)); - + if (best_blk.m_weight_ise_range == 5) { // Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets @@ -21221,7 +22027,7 @@ namespace basist assert(g_astc_hdr_core_initialized); assert(best_blk.m_num_partitions == 2); assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); - + half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index] // UASTC HDR checks @@ -21230,7 +22036,7 @@ namespace basist return false; if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11)) return false; - + if (best_blk.m_color_endpoint_modes[0] == 7) { if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) || @@ -21355,7 +22161,7 @@ namespace basist assert(0); return false; } - + if (log_blk.m_solid_color_flag_ldr) { // Don't support LDR solid colors. @@ -21371,7 +22177,7 @@ namespace basist // Only support 4x4 grid sizes if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4)) return false; - + // Don't support dual plane encoding if (log_blk.m_dual_plane) return false; @@ -21379,11 +22185,11 @@ namespace basist if (log_blk.m_num_partitions == 1) { // Handle 1 partition (or subset) - + // UASTC HDR checks if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8)) return false; - + int e[2][3]; bool success; @@ -21429,7 +22235,7 @@ namespace basist for (uint32_t i = 0; i < 2; i++) if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i])) return false; - + // Transcode to bc6h if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk)) return false; @@ -21442,7 +22248,7 @@ namespace basist return false; assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2); - + if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk)) return false; } @@ -21454,7 +22260,7 @@ namespace basist return true; } - + // ASTC 6x6 support namespace astc_6x6_hdr { @@ -21582,7 +22388,7 @@ namespace basist // 3x3 { false, 7, 3, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 }, - // 6x4 + // 6x4 { false, 7, 3, 6, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, { false, 7, 3, 4, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 }, @@ -21610,7 +22416,7 @@ namespace basist { false, 7, 3, 5, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, { false, 7, 3, 4, 5, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 }, }; - + const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS] = { { -1, 0 }, { -2, 0 }, { -3, 0 }, { -4, 0 }, @@ -21656,27 +22462,27 @@ namespace basist int bit = get_bit(src_val, src_bit); dst |= (bit << dst_bit); } - - // Valid for weight ISE ranges 12-192 levels. Preserves upper 2 or 3 bits post-quantization. - static uint8_t g_quantize_tables_preserve2[astc_helpers::TOTAL_ISE_RANGES - 1][256]; - static uint8_t g_quantize_tables_preserve3[astc_helpers::TOTAL_ISE_RANGES - 1][256]; + + // Valid for weight ISE ranges 6-192 or 8-192 levels. Preserves upper 2 or 3 bits post-quantization. + uint8_t g_quantize_tables_preserve2[21 - 1][256]; // astc_helpers::TOTAL_ISE_RANGES=21, valid for >= BISE_6_LEVELS + uint8_t g_quantize_tables_preserve3[21 - 1][256]; // valid for >= BISE_8_LEVELS const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2] = { - 86, 959, 936, 476, 1007, 672, 447, 423, 488, 422, 273, 65, 267, 786, 585, 195, 108, 731, 878, 812, 264, 125, 868, 581, 258, 390, 549, 872, 661, 352, 645, 543, 988, - 906, 903, 616, 482, 529, 3, 286, 272, 303, 151, 504, 498, 260, 79, 66, 608, 769, 305, 610, 1014, 967, 835, 789, 7, 951, 691, 15, 763, 976, 438, 314, 601, 673, 177, - 252, 615, 436, 220, 899, 623, 433, 674, 278, 797, 107, 847, 114, 470, 760, 821, 490, 329, 945, 387, 471, 225, 172, 83, 418, 966, 439, 316, 247, 43, 343, 625, 798, - 1, 61, 73, 307, 136, 474, 42, 664, 1013, 249, 389, 227, 374, 121, 48, 538, 226, 309, 554, 802, 834, 335, 495, 10, 955, 461, 293, 508, 153, 101, 63, 139, 31, 687, - 132, 174, 324, 545, 289, 39, 178, 594, 963, 854, 222, 323, 998, 964, 598, 475, 720, 1019, 983, 91, 703, 614, 394, 612, 281, 207, 930, 758, 586, 128, 517, 426, 306, - 168, 713, 36, 458, 876, 368, 780, 5, 9, 214, 109, 553, 726, 175, 103, 753, 684, 44, 665, 53, 500, 367, 611, 119, 732, 639, 326, 203, 156, 686, 910, 255, 62, 392, 591, - 112, 88, 213, 19, 1022, 478, 90, 486, 799, 702, 730, 414, 99, 1008, 142, 886, 373, 216, 69, 393, 299, 648, 415, 822, 912, 110, 567, 550, 693, 2, 138, 59, 271, 562, 295, - 714, 719, 199, 893, 831, 1006, 662, 235, 262, 78, 51, 902, 298, 190, 169, 583, 347, 890, 958, 909, 49, 987, 696, 633, 480, 50, 764, 826, 1023, 1016, 437, 891, 774, 257, - 724, 791, 526, 593, 690, 638, 858, 895, 794, 995, 130, 87, 877, 819, 318, 649, 376, 211, 284, 937, 370, 688, 229, 994, 115, 842, 60, 521, 95, 694, 804, 146, 754, 487, 55, - 17, 770, 450, 223, 4, 137, 911, 236, 683, 523, 47, 181, 24, 270, 602, 736, 11, 355, 148, 351, 762, 1009, 16, 210, 619, 805, 874, 807, 887, 403, 999, 810, 27, 402, 551, 135, - 778, 33, 409, 993, 71, 363, 159, 183, 77, 596, 670, 380, 968, 811, 404, 348, 539, 158, 578, 196, 621, 68, 530, 193, 100, 167, 919, 353, 366, 327, 643, 948, 518, 756, 801, 558, - 28, 705, 116, 94, 898, 453, 622, 647, 231, 445, 652, 230, 191, 277, 292, 254, 198, 766, 386, 232, 29, 70, 942, 740, 291, 607, 411, 496, 839, 8, 675, 319, 742, 21, 547, 627, 716, - 663, 23, 914, 631, 595, 499, 685, 950, 510, 54, 587, 432, 45, 646, 25, 122, 947, 171, 862, 441, 808, 722, 14, 74, 658, 129, 266, 1001, 534, 395, 527, 250, 206, 237, 67, 897, 634, - 572, 569, 533, 37, 341, 89, 463, 419, 75, 134, 283, 943, 519, 362, 144, 681, 407, 954, 131, 455, 934, 46, 513, 339, 194, 361, 606, 852, 546, 655, 1015, 147, 506, 240, 56, 836, 76, + 86, 959, 936, 476, 1007, 672, 447, 423, 488, 422, 273, 65, 267, 786, 585, 195, 108, 731, 878, 812, 264, 125, 868, 581, 258, 390, 549, 872, 661, 352, 645, 543, 988, + 906, 903, 616, 482, 529, 3, 286, 272, 303, 151, 504, 498, 260, 79, 66, 608, 769, 305, 610, 1014, 967, 835, 789, 7, 951, 691, 15, 763, 976, 438, 314, 601, 673, 177, + 252, 615, 436, 220, 899, 623, 433, 674, 278, 797, 107, 847, 114, 470, 760, 821, 490, 329, 945, 387, 471, 225, 172, 83, 418, 966, 439, 316, 247, 43, 343, 625, 798, + 1, 61, 73, 307, 136, 474, 42, 664, 1013, 249, 389, 227, 374, 121, 48, 538, 226, 309, 554, 802, 834, 335, 495, 10, 955, 461, 293, 508, 153, 101, 63, 139, 31, 687, + 132, 174, 324, 545, 289, 39, 178, 594, 963, 854, 222, 323, 998, 964, 598, 475, 720, 1019, 983, 91, 703, 614, 394, 612, 281, 207, 930, 758, 586, 128, 517, 426, 306, + 168, 713, 36, 458, 876, 368, 780, 5, 9, 214, 109, 553, 726, 175, 103, 753, 684, 44, 665, 53, 500, 367, 611, 119, 732, 639, 326, 203, 156, 686, 910, 255, 62, 392, 591, + 112, 88, 213, 19, 1022, 478, 90, 486, 799, 702, 730, 414, 99, 1008, 142, 886, 373, 216, 69, 393, 299, 648, 415, 822, 912, 110, 567, 550, 693, 2, 138, 59, 271, 562, 295, + 714, 719, 199, 893, 831, 1006, 662, 235, 262, 78, 51, 902, 298, 190, 169, 583, 347, 890, 958, 909, 49, 987, 696, 633, 480, 50, 764, 826, 1023, 1016, 437, 891, 774, 257, + 724, 791, 526, 593, 690, 638, 858, 895, 794, 995, 130, 87, 877, 819, 318, 649, 376, 211, 284, 937, 370, 688, 229, 994, 115, 842, 60, 521, 95, 694, 804, 146, 754, 487, 55, + 17, 770, 450, 223, 4, 137, 911, 236, 683, 523, 47, 181, 24, 270, 602, 736, 11, 355, 148, 351, 762, 1009, 16, 210, 619, 805, 874, 807, 887, 403, 999, 810, 27, 402, 551, 135, + 778, 33, 409, 993, 71, 363, 159, 183, 77, 596, 670, 380, 968, 811, 404, 348, 539, 158, 578, 196, 621, 68, 530, 193, 100, 167, 919, 353, 366, 327, 643, 948, 518, 756, 801, 558, + 28, 705, 116, 94, 898, 453, 622, 647, 231, 445, 652, 230, 191, 277, 292, 254, 198, 766, 386, 232, 29, 70, 942, 740, 291, 607, 411, 496, 839, 8, 675, 319, 742, 21, 547, 627, 716, + 663, 23, 914, 631, 595, 499, 685, 950, 510, 54, 587, 432, 45, 646, 25, 122, 947, 171, 862, 441, 808, 722, 14, 74, 658, 129, 266, 1001, 534, 395, 527, 250, 206, 237, 67, 897, 634, + 572, 569, 533, 37, 341, 89, 463, 419, 75, 134, 283, 943, 519, 362, 144, 681, 407, 954, 131, 455, 934, 46, 513, 339, 194, 361, 606, 852, 546, 655, 1015, 147, 506, 240, 56, 836, 76, 98, 600, 430, 388, 980, 695, 817, 279, 58, 215, 149, 170, 531, 870, 18, 727, 154, 26, 938, 929, 302, 697, 452, 218, 700, 524, 828, 751, 869, 217, 440, 354 }; @@ -21707,7 +22513,9 @@ namespace basist static void init_quantize_tables() { - for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_12_LEVELS; ise_range--) + // 9/15/2025 changed lower range for LDR + // for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_12_LEVELS; ise_range--) + for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_6_LEVELS; ise_range--) { const uint32_t num_levels = astc_helpers::get_ise_levels(ise_range); const auto& ise_to_val_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_ISE_to_val; @@ -21739,6 +22547,7 @@ namespace basist g_quantize_tables_preserve2[ise_range][desired_val] = (uint8_t)best_ise_val; } + if (ise_range >= astc_helpers::BISE_8_LEVELS) { uint32_t best_err = UINT32_MAX; int best_ise_val = -1; @@ -21894,7 +22703,7 @@ namespace basist #endif } - void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk) + void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk, bool orig_behavior) { assert(decomp_blk.m_weight_ise_range >= astc_helpers::BISE_2_LEVELS); assert(decomp_blk.m_weight_ise_range <= astc_helpers::BISE_32_LEVELS); @@ -21913,11 +22722,12 @@ namespace basist compute_upsample_weights(4, 4, 2, 2, weights); - for (uint32_t y = 0; y < 4; y++) + for (uint32_t dy = 0; dy < 4; dy++) { - for (uint32_t x = 0; x < 4; x++) + for (uint32_t dx = 0; dx < 4; dx++) { - const astc_helpers::weighted_sample& sample = weights[x + y * 4]; + const astc_helpers::weighted_sample& sample = weights[dx + dy * 4]; + const int sx = sample.m_src_x, sy = sample.m_src_y; uint32_t total_weight = 8; @@ -21928,7 +22738,19 @@ namespace basist if (!sample.m_weights[yo][xo]) continue; - total_weight += dequant_weight[transcode_weights[basisu::in_bounds((x + xo) + (y + yo) * grid_x, 0, grid_x * grid_y)]] * sample.m_weights[yo][xo]; + // 10/17/2025 - bugfix. Orig release would always sample the 1st or 2nd weight here. Minor issue - encoder would have detected it, hurting R-D performance a tiny bit but not encoding/decoding correctness. + // However, this fix does cause decoding divergence from original encodes. The divergence seems minor and can only happen at higher lambdas. + if (orig_behavior) + { + // Original, incorrect, but ultimately harmless behavior. + total_weight += dequant_weight[transcode_weights[basisu::is_in_bounds((dx + xo) + (dy + yo) * grid_x, 0, grid_x * grid_y)]] * sample.m_weights[yo][xo]; + } + else + { + // Correct behavior. + assert(basisu::is_in_bounds((sx + xo) + (sy + yo) * grid_x, 0, grid_x * grid_y)); + total_weight += dequant_weight[transcode_weights[(sx + xo) + (sy + yo) * grid_x]] * sample.m_weights[yo][xo]; + } } // x } // y @@ -21936,7 +22758,7 @@ namespace basist assert(total_weight <= 64); - decomp_blk.m_weights[x + y * 4] = quant_weight[total_weight]; + decomp_blk.m_weights[dx + dy * 4] = quant_weight[total_weight]; } } } @@ -22063,7 +22885,7 @@ namespace basist assert((cem == 7) || (cem == 11)); return (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS; } - + const uint32_t g_bc6h_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; #if 0 @@ -22147,7 +22969,7 @@ namespace basist float res = (float)fast_float_to_half_no_clamp_neg_nan_or_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f); return res; } - + // Supports positive and denormals only. No NaN or Inf. static BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h) { @@ -22180,13 +23002,12 @@ namespace basist static const int FAST_BC6H_STD_DEV_THRESH = 256; static const int FAST_BC6H_COMPLEX_STD_DEV_THRESH = 512; static const int FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH = 2048; - - + static double assign_weights_4( const vec3F* pFloat_pixels, const float* pPixel_scales, uint8_t* pWeights, int min_r, int min_g, int min_b, - int max_r, int max_g, int max_b, int64_t block_max_var, bool try_2subsets_flag, + int max_r, int max_g, int max_b, int64_t block_max_var, bool try_2subsets_flag, const fast_bc6h_params& params) { float cr[16], cg[16], cb[16]; @@ -22304,7 +23125,7 @@ namespace basist pWeights[i] = (uint8_t)best_idx; - // Giesen's MRSSE (Mean Relative Sum of Squared Errors). + // Giesen's MRSSE (Mean Relative Sum of Squared Errors). // Our ASTC HDR encoder uses slightly slower approx. MSLE, and it's too late/risky to eval the difference vs. MRSSE on the larger ASTC HDR blocks. float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]); total_err += err * pPixel_scales[i]; @@ -22379,9 +23200,11 @@ namespace basist const fast_bc6h_params& params) { BASISU_NOTE_UNUSED(block_max_var); + float fmin_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_r); float fmin_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_g); float fmin_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_b); + float fmax_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_r); float fmax_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_g); float fmax_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_b); @@ -22476,7 +23299,7 @@ namespace basist float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb); uint32_t best_idx = 0; - + for (uint32_t j = 1; j < 8; j++) { float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb); @@ -22549,7 +23372,7 @@ namespace basist static basist::vec4F g_bc6h_ls_weights_3[8]; static basist::vec4F g_bc6h_ls_weights_4[16]; - + const uint32_t BC6H_NUM_PATS = 32; static uint32_t g_bc6h_pats2[BC6H_NUM_PATS]; @@ -22615,7 +23438,7 @@ namespace basist static void bc6h_quant_endpoints( uint32_t min_hr, uint32_t min_hg, uint32_t min_hb, uint32_t max_hr, uint32_t max_hg, uint32_t max_hb, - uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, + uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, int bits) { min_r = basist::bc6h_half_to_blog((basist::half_float)min_hr, bits); @@ -22641,7 +23464,7 @@ namespace basist max_hb = bc6h_convert_to_half(bc6h_dequantize(max_bb, bits)); } - static BASISU_FORCE_INLINE int popcount32(uint32_t x) + static BASISU_FORCE_INLINE int popcount32(uint32_t x) { #if defined(__EMSCRIPTEN__) || defined(__clang__) || defined(__GNUC__) return __builtin_popcount(x); @@ -22649,7 +23472,7 @@ namespace basist return __popcnt(x); #else int count = 0; - while (x) + while (x) { x &= (x - 1); ++count; @@ -22662,23 +23485,23 @@ namespace basist { return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); } - + static void fast_encode_bc6h_2subsets_pattern( uint32_t best_pat_index, uint32_t best_pat_bits, const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales, double& cur_error, basist::bc6h_logical_block& log_blk, int64_t block_max_var, - int mean_r, int mean_g, int mean_b, + int mean_r, int mean_g, int mean_b, const fast_bc6h_params& params) { BASISU_NOTE_UNUSED(block_max_var); - + uint32_t subset_means[2][3] = { { 0 } }; for (uint32_t i = 0; i < 16; i++) { const uint32_t subset_index = (best_pat_bits >> i) & 1; const uint32_t r = pPixels[i * 3 + 0], g = pPixels[i * 3 + 1], b = pPixels[i * 3 + 2]; - + subset_means[subset_index][0] += r; subset_means[subset_index][1] += g; subset_means[subset_index][2] += b; @@ -22731,7 +23554,7 @@ namespace basist subset_axis[subset_index].set(axis_r, axis_g, axis_b); } // s - + float subset_min_dot[2] = { basisu::BIG_FLOAT_VAL, basisu::BIG_FLOAT_VAL }; float subset_max_dot[2] = { -basisu::BIG_FLOAT_VAL, -basisu::BIG_FLOAT_VAL }; int subset_min_idx[2] = { 0 }, subset_max_idx[2] = { 0 }; @@ -22848,7 +23671,7 @@ namespace basist if (params.m_num_diff_endpoint_modes_to_try) { // ordered from largest base bits to least - static const int s_bc6h_mode_order2[2] = { 5, 1 }; + static const int s_bc6h_mode_order2[2] = { 5, 1 }; static const int s_bc6h_mode_order4[4] = { 0, 5, 7, 1 }; static const int s_bc6h_mode_order9[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; @@ -22956,10 +23779,10 @@ namespace basist trial_log_blk.m_mode = bc6h_mode_index; trial_log_blk.m_partition_pattern = best_pat_index; - + memcpy(trial_log_blk.m_endpoints, abs_blog_endpoints, sizeof(trial_log_blk.m_endpoints)); memcpy(trial_log_blk.m_weights, trial_weights, 16); - + if (trial_log_blk.m_weights[0] & 4) { for (uint32_t c = 0; c < 3; c++) @@ -22986,7 +23809,7 @@ namespace basist trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i]; } } - + if (bc6h_mode_index != BC6H_2SUBSET_ABS_ENDPOINT_MODE) { const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[bc6h_mode_index][1], g_bc6h_mode_sig_bits[bc6h_mode_index][2], g_bc6h_mode_sig_bits[bc6h_mode_index][3] }; @@ -23027,7 +23850,7 @@ namespace basist const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales, double& cur_error, basist::bc6h_logical_block& log_blk, int64_t block_max_var, - int mean_r, int mean_g, int mean_b, float block_axis_r, float block_axis_g, float block_axis_b, + int mean_r, int mean_g, int mean_b, float block_axis_r, float block_axis_g, float block_axis_b, const fast_bc6h_params& params) { assert((params.m_max_2subset_pats_to_try > 0) && (params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS)); @@ -23048,7 +23871,7 @@ namespace basist } return; } - + uint32_t desired_pat_bits = 0; for (uint32_t i = 0; i < 16; i++) { @@ -23126,13 +23949,13 @@ namespace basist uint32_t omin_r = UINT32_MAX, omin_g = UINT32_MAX, omin_b = UINT32_MAX; uint32_t omax_r = 0, omax_g = 0, omax_b = 0; uint32_t total_r = 0, total_g = 0, total_b = 0; - + for (uint32_t i = 0; i < 16; i++) { uint32_t r = pPixels[i * 3 + 0]; uint32_t g = pPixels[i * 3 + 1]; uint32_t b = pPixels[i * 3 + 2]; - + total_r += r; total_g += g; total_b += b; @@ -23157,13 +23980,13 @@ namespace basist log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_b); log_blk.m_endpoints[2][1] = 0; - + log_blk.m_mode = 13; pack_bc6h_block(*pBlock, log_blk); return; } - + uint32_t min_r, min_g, min_b, max_r, max_g, max_b; int mean_r = (total_r + 8) / 16; @@ -23185,9 +24008,9 @@ namespace basist icov[4] += g * b; icov[5] += b * b; } - + int64_t block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 - + if (block_max_var < (FAST_BC6H_STD_DEV_THRESH * FAST_BC6H_STD_DEV_THRESH * 16)) { // Simple block @@ -23204,7 +24027,7 @@ namespace basist bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10); assign_weights_simple_4(pPixels, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var, params); - + log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10); log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10); @@ -23282,7 +24105,7 @@ namespace basist uint32_t min_idx = 0, max_idx = 0; float min_dot = basisu::BIG_FLOAT_VAL, max_dot = -basisu::BIG_FLOAT_VAL; - + for (uint32_t i = 0; i < 16; i++) { float r = (float)pPixels[i * 3 + 0]; @@ -23318,12 +24141,13 @@ namespace basist max_g = pPixels[max_idx * 3 + 1]; max_b = pPixels[max_idx * 3 + 2]; + //assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS)); assert((max_r <= MAX_HALF_FLOAT_AS_INT_BITS) && (max_g <= MAX_HALF_FLOAT_AS_INT_BITS) && (max_b <= MAX_HALF_FLOAT_AS_INT_BITS)); bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10); cur_err = assign_weights_4(float_pixels, pixel_scales, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var, try_2subsets, params); - + const uint32_t MAX_LS_PASSES = params.m_hq_ls ? 2 : 1; for (uint32_t pass = 0; pass < MAX_LS_PASSES; pass++) { @@ -23399,7 +24223,7 @@ namespace basist min_b = trial_min_b; max_b = trial_max_b; - + memcpy(log_blk.m_weights, trial_weights, 16); } else @@ -23441,7 +24265,7 @@ namespace basist std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]); } } - + if ((params.m_max_2subset_pats_to_try > 0) && ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16)))) { fast_encode_bc6h_2subsets(pPixels, float_pixels, pixel_scales, cur_err, log_blk, block_max_var, mean_r, mean_g, mean_b, axis_r, axis_g, axis_b, params); @@ -23467,7 +24291,12 @@ namespace basist if (!decoder.init(pComp_data, comp_data_size)) return false; - if (decoder.get_bits(16) != 0xABCD) + bool orig_behavior = false; + + uint32_t hdr_sig = decoder.get_bits(16); + if (hdr_sig == UASTC_6x6_HDR_SIG0) + orig_behavior = true; + else if (hdr_sig != UASTC_6x6_HDR_SIG1) return false; width = decoder.get_bits(16); @@ -23649,7 +24478,7 @@ namespace basist uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2]; requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, decomp_blk.m_weight_ise_range); - copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk); + copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk, orig_behavior); #else assert(log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS); const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)log_blk.m_user_mode]; @@ -23683,7 +24512,7 @@ namespace basist uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); - copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk, orig_behavior); #endif status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); if (!status) @@ -23769,7 +24598,7 @@ namespace basist uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); - copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk, orig_behavior); status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); if (!status) @@ -23867,7 +24696,7 @@ namespace basist uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); - copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk, orig_behavior); status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); if (!status) @@ -23951,7 +24780,7 @@ namespace basist uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2]; requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range); - copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk); + copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk, orig_behavior); status = astc_helpers::pack_astc_block(phys_blk, decomp_blk); if (!status) @@ -23999,4 +24828,17026 @@ namespace basist #endif // BASISD_SUPPORT_UASTC_HDR -} // namespace basist +#if BASISD_SUPPORT_XUASTC +namespace astc_ldr_t +{ + bool g_initialized; + astc_block_grid_data_hash_t g_astc_block_grid_data_hash; + + // Used for quickly bumping up or down quantized, 2 complement+shifted base+offset delta values without disturbing the MSB. + static basisu::vector g_base_ofs_nudges[astc_helpers::BISE_256_LEVELS + 1][2]; // [endpoint_ise_range][pos=0, neg=1] + + const int s_unique_ldr_index_to_astc_cem[6] = + { + astc_helpers::CEM_LDR_LUM_DIRECT, + astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT, + astc_helpers::CEM_LDR_RGB_BASE_SCALE, + astc_helpers::CEM_LDR_RGB_DIRECT, + astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A, + astc_helpers::CEM_LDR_RGBA_DIRECT + }; + + static void compute_base_ofs_requantize_tabs() + { + for (uint32_t e_ise_range = astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE; e_ise_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE; e_ise_range++) + { + const uint32_t num_levels = astc_helpers::get_ise_levels(e_ise_range); + + for (uint32_t pos_or_neg = 0; pos_or_neg < 2; pos_or_neg++) + { + g_base_ofs_nudges[e_ise_range][pos_or_neg].resize(num_levels); + + const int delta = pos_or_neg ? -1 : 1; + + for (uint32_t cur_ise = 0; cur_ise < num_levels; cur_ise++) + { + int cur_dequant = astc_helpers::g_dequant_tables.get_endpoint_tab(e_ise_range).m_ISE_to_val[cur_ise]; + + int cur_a = cur_dequant, cur_b = 0; + astc_helpers::bit_transfer_signed_dec(cur_a, cur_b); + + int best_err = INT_MAX; + uint32_t best_trial_ise = 0; + + for (uint32_t trial_ise = 0; trial_ise < num_levels; trial_ise++) + { + int trial_dequant = astc_helpers::g_dequant_tables.get_endpoint_tab(e_ise_range).m_ISE_to_val[trial_ise]; + + int trial_a = trial_dequant, trial_b = 0; + astc_helpers::bit_transfer_signed_dec(trial_a, trial_b); + + // ensure the transferred bit hasn't changed + if (cur_b != trial_b) + continue; + + // skip if the decoded delta hasn't changed at all + if (trial_a == cur_a) + continue; + + // do they want to nudge neg or pos + if (delta < 0) + { + // neg nudge, but trial delta is higher + if (trial_a > cur_a) + continue; + } + else + { + // pos nudge, but trial delta is lower + if (trial_a < cur_a) + continue; + } + + int e = basisu::iabs(trial_a - cur_a); + if (e < best_err) + { + best_err = e; + best_trial_ise = trial_ise; + } + } // trial_ise + + if (best_err == INT_MAX) + { + //fmt_printf("Failed nudge: eise:{}, delta: {}, curise:{}, cura:{}, curb:{}\n", e_ise_range, delta, cur_ise, cur_a, cur_b); + + // Failed to nudge, leave it unchanged + best_trial_ise = cur_ise; + } + + g_base_ofs_nudges[e_ise_range][pos_or_neg][cur_ise] = (uint8_t)best_trial_ise; + + } // cur_ise + + } // pos_or_neg + + } // e_ise_range + } + + void init() + { + if (g_initialized) + return; + + g_initialized = true; + + init_astc_block_grid_data_hash(); + + compute_base_ofs_requantize_tabs(); + } + + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b) + { + color_rgba enc; + + int tr = orig.r * 2 - encoded_b; + int tg = orig.g * 2 - encoded_b; + if ((tr < 0) || (tr > 255) || (tg < 0) || (tg > 255)) + did_clamp = true; + + enc.r = (uint8_t)basisu::clamp(tr, 0, 255); + enc.g = (uint8_t)basisu::clamp(tg, 0, 255); + enc.b = (uint8_t)orig.b; + enc.a = orig.a; + return enc; + } + + color_rgba blue_contract_dec(int enc_r, int enc_g, int enc_b, int enc_a) + { + color_rgba dec; + dec.r = (uint8_t)((enc_r + enc_b) >> 1); + dec.g = (uint8_t)((enc_g + enc_b) >> 1); + dec.b = (uint8_t)enc_b; + dec.a = (uint8_t)enc_a; + return dec; + } + + static inline int quant_preserve2(uint32_t ise_range, uint32_t v) + { + if (ise_range == astc_helpers::BISE_256_LEVELS) + return v; + + assert(ise_range >= astc_helpers::BISE_6_LEVELS); + + return basist::astc_6x6_hdr::g_quantize_tables_preserve2[ise_range][v]; + } + + //---------------------------------------------------------------------------------- + // Requantize endpoints, but preserves blue contraction and base+ofs bits as much as possible. + + // Blue contraction should be preserved almost always if quantizing down, except with base+ofs (extremely to incredibly rare). + // endpoints never swapped for base+ofs + // NOTE: Cannot use any floating point math for determinism across compilers. + bool requantize_ise_endpoints(uint32_t cem, + uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, + uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints) + { + if (!astc_helpers::is_cem_ldr(cem)) + { + assert(0); + return false; + } + + const uint32_t num_endpoint_vals = astc_helpers::get_num_cem_values(cem); + assert(num_endpoint_vals <= astc_helpers::MAX_CEM_ENDPOINT_VALS); + + if (src_ise_endpoint_range == dst_ise_endpoint_range) + { + memcpy(pDst_endpoints, pSrc_endpoints, num_endpoint_vals); + return true; + } + + uint8_t dequantized_src_vals_temp[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + const uint8_t* pDequantized_src_vals = pSrc_endpoints; + + if (src_ise_endpoint_range != astc_helpers::BISE_256_LEVELS) + { + const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val; + + for (uint32_t i = 0; i < num_endpoint_vals; i++) + dequantized_src_vals_temp[i] = dequant_tab[pSrc_endpoints[i]]; + + pDequantized_src_vals = dequantized_src_vals_temp; + } + + if (dst_ise_endpoint_range == astc_helpers::BISE_256_LEVELS) + { + memcpy(pDst_endpoints, pDequantized_src_vals, num_endpoint_vals); + return true; + } + + const auto& dst_quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_val_to_ise; + + if ((cem == astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem == astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)) + { + const auto& dst_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val; + + for (uint32_t i = 0; i < num_endpoint_vals; i++) + { + // preserve v1,v3,v5,v7, which have 2 MSB's that need to be preserved during requant + if (i & 1) + pDst_endpoints[i] = (uint8_t)quant_preserve2(dst_ise_endpoint_range, pDequantized_src_vals[i]); + else + pDst_endpoints[i] = dst_quant_tab[pDequantized_src_vals[i]]; + } + +#ifdef _DEBUG + { + const auto& src_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val; + + // ensure MSB's did not change + for (uint32_t i = 0; i < num_endpoint_vals; i++) + { + int src_v = src_dequant_tab[pSrc_endpoints[i]]; + int dst_v = dst_dequant_tab[pDst_endpoints[i]]; + assert((src_v & 128) == (dst_v & 128)); + + if (i & 1) + { + assert((src_v & 64) == (dst_v & 64)); + } + } + } +#endif + + const bool src_used_blue_contract = astc_helpers::used_blue_contraction(cem, pSrc_endpoints, src_ise_endpoint_range); + // src delta sum was < 0 if it used blue contraction, >= 0 if it did NOT + + int v0 = dst_dequant_tab[pDst_endpoints[0]], v1 = dst_dequant_tab[pDst_endpoints[1]]; + int v2 = dst_dequant_tab[pDst_endpoints[2]], v3 = dst_dequant_tab[pDst_endpoints[3]]; + int v4 = dst_dequant_tab[pDst_endpoints[4]], v5 = dst_dequant_tab[pDst_endpoints[5]]; + + astc_helpers::bit_transfer_signed_dec(v1, v0); + astc_helpers::bit_transfer_signed_dec(v3, v2); + astc_helpers::bit_transfer_signed_dec(v5, v4); + + int s = v1 + v3 + v5; + bool quant_used_blue_contraction = (s < 0); + + // Kind of a dumb algorithm, but it only tries 2-3 times in random testing. + //const uint32_t MAX_TRIES = 10; + const uint32_t MAX_TRIES = 5; + + uint32_t tries = 0; + + if (src_used_blue_contract != quant_used_blue_contraction) + { + int nudge_delta = quant_used_blue_contraction ? 1 : -1; + + uint32_t cur_c_rover = 2; // b first + + for (tries = 0; tries < MAX_TRIES; tries++) + { + for (uint32_t j = 0; j < 3; j++) + { + const uint32_t i = (cur_c_rover + j) % 3; + + // This will either nudge the delta, or fail because it's either at the [-32,31] limit or it can't go further in the desired delta direction due to quantization limits + uint32_t new_ise_v = g_base_ofs_nudges[dst_ise_endpoint_range][(nudge_delta < 0) ? 1 : 0][pDst_endpoints[1 + i * 2]]; + + if (new_ise_v != pDst_endpoints[1 + i * 2]) + { + // It changed, so a successful nudge, but the base MSB should be preserved + pDst_endpoints[1 + i * 2] = (uint8_t)new_ise_v; + break; + } + } + + v0 = dst_dequant_tab[pDst_endpoints[0]], v1 = dst_dequant_tab[pDst_endpoints[1]]; + v2 = dst_dequant_tab[pDst_endpoints[2]], v3 = dst_dequant_tab[pDst_endpoints[3]]; + v4 = dst_dequant_tab[pDst_endpoints[4]], v5 = dst_dequant_tab[pDst_endpoints[5]]; + + astc_helpers::bit_transfer_signed_dec(v1, v0); + astc_helpers::bit_transfer_signed_dec(v3, v2); + astc_helpers::bit_transfer_signed_dec(v5, v4); + + s = v1 + v3 + v5; + quant_used_blue_contraction = (s < 0); + + if (src_used_blue_contract == quant_used_blue_contraction) + break; + + ++cur_c_rover; + + } // tries + } + + if (tries < MAX_TRIES) + { + assert(astc_helpers::used_blue_contraction(cem, pDst_endpoints, dst_ise_endpoint_range) == astc_helpers::used_blue_contraction(cem, pSrc_endpoints, src_ise_endpoint_range)); + } + else + { + // It failed to adjust, ultimately harmless as we have RGB(A) direct anyway (and at this likely very low quant level, it won't matter). + // TODO: We could try more adjustments, but this seems extremely unlikely to be worth the trouble after random testing. + +#if BASISU_ASTC_LDR_DEBUG_MSGS + static bool s_msg_printed = false; + if (!s_msg_printed) + fmt_debug_printf("requantize_ise_endpoints: blue contraction enforcement failed\n"); +#endif + } + +#ifdef _DEBUG + { + const auto& src_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val; + + // ensure MSB's did not change + for (uint32_t i = 0; i < num_endpoint_vals; i++) + { + int src_v = src_dequant_tab[pSrc_endpoints[i]]; + int dst_v = dst_dequant_tab[pDst_endpoints[i]]; + assert((src_v & 128) == (dst_v & 128)); + } + } +#endif + } + else if ((cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + const auto& dst_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val; + + // See if the original colors were blue contracted + uint32_t s0 = pDequantized_src_vals[0] + pDequantized_src_vals[2] + pDequantized_src_vals[4]; + uint32_t s1 = pDequantized_src_vals[1] + pDequantized_src_vals[3] + pDequantized_src_vals[5]; + + const bool orig_used_blue_contract = s1 < s0; + + for (uint32_t i = 0; i < num_endpoint_vals; i++) + pDst_endpoints[i] = dst_quant_tab[pDequantized_src_vals[i]]; + + uint32_t dequant_s0 = dst_dequant_tab[pDst_endpoints[0]] + dst_dequant_tab[pDst_endpoints[2]] + dst_dequant_tab[pDst_endpoints[4]]; + uint32_t dequant_s1 = dst_dequant_tab[pDst_endpoints[1]] + dst_dequant_tab[pDst_endpoints[3]] + dst_dequant_tab[pDst_endpoints[5]]; + + const bool quant_used_blue_contract = dequant_s1 < dequant_s0; + + if (orig_used_blue_contract != quant_used_blue_contract) + { + if (dequant_s0 == dequant_s1) + { + assert(orig_used_blue_contract); + assert(!quant_used_blue_contract); + + // swapping won't work because sums are equal, so force dst to use blue contraction by nudgling a component + // original s1=requant_s0 + + if (dequant_s1) + { + // decrease s1 + for (uint32_t i = 0; i < 3; i++) + { + uint32_t new_ise_v = astc_helpers::apply_delta_to_bise_endpoint_val(dst_ise_endpoint_range, pDst_endpoints[1 + i * 2], -1); + if (new_ise_v != pDst_endpoints[1 + i * 2]) + { + pDst_endpoints[1 + i * 2] = (uint8_t)new_ise_v; + break; + } + } + } + else + { + // both are 0, increase s0 + for (uint32_t i = 0; i < 3; i++) + { + uint32_t new_ise_val = astc_helpers::apply_delta_to_bise_endpoint_val(dst_ise_endpoint_range, pDst_endpoints[i * 2], 1); + if (new_ise_val != pDst_endpoints[i * 2]) + { + pDst_endpoints[i * 2] = (uint8_t)new_ise_val; + break; + } + } + } + } + else + { + std::swap(pDst_endpoints[0], pDst_endpoints[1]); + std::swap(pDst_endpoints[2], pDst_endpoints[3]); + std::swap(pDst_endpoints[4], pDst_endpoints[5]); + + if (cem == astc_helpers::CEM_LDR_RGBA_DIRECT) + std::swap(pDst_endpoints[6], pDst_endpoints[7]); + } + } + + assert(astc_helpers::used_blue_contraction(cem, pDst_endpoints, dst_ise_endpoint_range) == astc_helpers::used_blue_contraction(cem, pSrc_endpoints, src_ise_endpoint_range)); + } + else + { + for (uint32_t i = 0; i < num_endpoint_vals; i++) + pDst_endpoints[i] = dst_quant_tab[pDequantized_src_vals[i]]; + +#ifdef _DEBUG + { + const auto& src_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val; + const auto& dst_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val; + + // ensure MSB's did not change + for (uint32_t i = 0; i < num_endpoint_vals; i++) + { + int src_v = src_dequant_tab[pSrc_endpoints[i]]; + int dst_v = dst_dequant_tab[pDst_endpoints[i]]; + assert((src_v & 128) == (dst_v & 128)); + } + } +#endif + } + + return true; + } + + // First packs base+ofs to ise20 (always enforcing blue contraction), then quantizes down (preserving blue contraction whenever possible, which might possibly not be in extreme quantizations). + // NOTE: Cannot use any floating point math for determinism across compilers. + bool pack_base_offset( + uint32_t cem_index, uint32_t dst_ise_endpoint_range, uint8_t* pPacked_endpoints, + const color_rgba& l, const color_rgba& h, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag, bool& endpoints_swapped) + { + blue_contraction_clamped_flag = false; + base_ofs_clamped_flag = false; + endpoints_swapped = false; + + if ((cem_index != astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET) && (cem_index != astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET)) + { + assert(0); + return false; + } + + color_rgba pack_l(l), pack_h(h); + + if (use_blue_contraction) + { + color_rgba enc_l(blue_contract_enc(pack_l, blue_contraction_clamped_flag, pack_l.b)); + color_rgba enc_h(blue_contract_enc(pack_h, blue_contraction_clamped_flag, pack_h.b)); + + if ((blue_contraction_clamped_flag) && (auto_disable_blue_contraction_if_clamped)) + { + use_blue_contraction = false; + } + else + { + pack_h = enc_l; + pack_l = enc_h; + + endpoints_swapped = true; + } + } + + int dr = 0, dg = 0, db = 0, da = 0; + bool pack_uses_blue_contraction = false; + int low_clamp = -32; + + // first 2 passes try with swapping by asymmetric clamping, then next 2 try with symmetric clamping, should always succeed + for (uint32_t pass = 0; pass < 4; pass++) + { + // Take previous CEM's values and try to encode to base+offset as best we can, it may clamp + int orig_dr = pack_h.r - pack_l.r, orig_dg = pack_h.g - pack_l.g, orig_db = pack_h.b - pack_l.b, orig_da = pack_h.a - pack_l.a; + + base_ofs_clamped_flag = false; + + dr = basisu::clamp(orig_dr, low_clamp, 31); + if (dr != orig_dr) base_ofs_clamped_flag = true; + + dg = basisu::clamp(orig_dg, low_clamp, 31); + if (dg != orig_dg) base_ofs_clamped_flag = true; + + db = basisu::clamp(orig_db, low_clamp, 31); + if (db != orig_db) base_ofs_clamped_flag = true; + + da = basisu::clamp(orig_da, low_clamp, 31); + if (da != orig_da) base_ofs_clamped_flag = true; + + int s = dr + dg + db; + + pack_uses_blue_contraction = s < 0; + + if (pack_uses_blue_contraction == use_blue_contraction) + break; + + if (s == 0) + { + assert(!pack_uses_blue_contraction); + assert(use_blue_contraction); + + // !pack_uses_blue_contraction here, sum=0, so force sum negative + if (db > -32) + db--; + else if (dr > -32) + dr--; + else if (dg > -32) + dg--; + else + { + // they can't be all -32 (or negative), otherwise sum couldn't be 0 + assert(0); + } + + assert((dr + dg + db) < 0); + + pack_uses_blue_contraction = true; + + break; + } + + if (pass == 3) + { + // theoretically unreachable + assert(0); + break; + } + + if (pass == 1) + { + // Try 2 more swap passes, but enforce a symmetric clamp range - this *should* work. + low_clamp = -31; + } + + std::swap(pack_l, pack_h); + endpoints_swapped = !endpoints_swapped; + + } // pass + + int v0 = pack_l.r, v2 = pack_l.g, v4 = pack_l.b; + int v1 = dr, v3 = dg, v5 = db; + + // lossless at 8-bits + astc_helpers::bit_transfer_signed_enc(v1, v0); + astc_helpers::bit_transfer_signed_enc(v3, v2); + astc_helpers::bit_transfer_signed_enc(v5, v4); + + int v6 = 0, v7 = 0; + if (astc_helpers::does_cem_have_alpha(cem_index)) + { + v6 = pack_l.a; + v7 = da; + + astc_helpers::bit_transfer_signed_enc(v7, v6); + } + + uint8_t new_endpoints8[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + + new_endpoints8[0] = (uint8_t)v0; + new_endpoints8[1] = (uint8_t)v1; + + new_endpoints8[2] = (uint8_t)v2; + new_endpoints8[3] = (uint8_t)v3; + + new_endpoints8[4] = (uint8_t)v4; + new_endpoints8[5] = (uint8_t)v5; + + if (astc_helpers::does_cem_have_alpha(cem_index)) + { + new_endpoints8[6] = (uint8_t)v6; + new_endpoints8[7] = (uint8_t)v7; + } + + // This should always succeed. + assert(astc_helpers::used_blue_contraction(cem_index, new_endpoints8, astc_helpers::BISE_256_LEVELS) == use_blue_contraction); + + // requant predicted 256 level endpoints to current endpoint quant level, this will nearly always (if not always) succeed + bool status = requantize_ise_endpoints( + cem_index, astc_helpers::BISE_256_LEVELS, new_endpoints8, + dst_ise_endpoint_range, pPacked_endpoints); + + // can't assert because requant to a very low quant level could have failed to preserve blue contraction (in practice, super rare, perhaps impossible - still determining) + //assert(astc_helpers::used_blue_contraction(cem_index, pPredicted_endpoints, cur_blk.m_endpoint_ise_range) == pack_uses_blue_contraction); + + return status; + } + + // converts a previous block's endpoints, using any supported LDR CEM/endpoint quant level, into a new CEM/endpoint quant level + // used for prediction or *potentially* coding purposes + // will return num_dst_endpoint_vals residuals in cur_blk's endpoint level quant + // NOTE: Cannot use any floating point math for determinism across compilers. + bool convert_endpoints_across_cems( + uint32_t prev_cem, uint32_t prev_endpoint_ise_range, const uint8_t* pPrev_endpoints, + uint32_t dst_cem, uint32_t dst_endpoint_ise_range, uint8_t* pDst_endpoints, + bool always_repack, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag) + { + blue_contraction_clamped_flag = false; + base_ofs_clamped_flag = false; + + const uint32_t num_dst_endpoint_vals = astc_helpers::get_num_cem_values(dst_cem); + + const auto& dst_quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_endpoint_ise_range).m_val_to_ise; + const auto& dst_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_endpoint_ise_range).m_ISE_to_val; + + if ((prev_cem == dst_cem) && (!always_repack)) + { + // CEM's are precisely equal + // Requantize prev block's endpoints into the current block's quant levels, and we're done + return requantize_ise_endpoints( + prev_cem, prev_endpoint_ise_range, pPrev_endpoints, + dst_endpoint_ise_range, pDst_endpoints); + } + + // CEM's cannot be precisely equal now, compute base CEM's (removing alpha from consideration) + if (!always_repack) + { + // this path preserves the original's blue contraction status + const uint32_t prev_base_cem = astc_helpers::get_base_cem_without_alpha(prev_cem); + const uint32_t dst_base_cem = astc_helpers::get_base_cem_without_alpha(dst_cem); + + // prev cem has alpha, cur cem doesn't, but otherwise modes identical, so it's being stripped + if ((prev_base_cem == dst_base_cem) && (!astc_helpers::does_cem_have_alpha(dst_cem))) + { + assert(astc_helpers::does_cem_have_alpha(prev_cem)); + assert(astc_helpers::get_num_cem_values(prev_base_cem) == num_dst_endpoint_vals); + + // Requantize prev block's endpoints into the current block's quant levels, but ignore the alpha values (which are always the last 2 entries) + return requantize_ise_endpoints( + prev_base_cem, prev_endpoint_ise_range, pPrev_endpoints, + dst_endpoint_ise_range, pDst_endpoints); + } + + // if prev cem doesn't have alpha, but the current cem does, but otherwise modes are identical, so add sane alpha (both 255) and hope for best in the prediction + if ((prev_base_cem == dst_base_cem) && (astc_helpers::does_cem_have_alpha(dst_cem))) + { + assert(!astc_helpers::does_cem_have_alpha(prev_base_cem)); + + // requant previous endpoints to current endpoint quant level + bool status = requantize_ise_endpoints( + prev_base_cem, prev_endpoint_ise_range, pPrev_endpoints, + dst_endpoint_ise_range, pDst_endpoints); + + if (!status) + return false; + + // just plug in 255 to both alphas + const int ise_a_val = dst_quant_tab[255]; + + switch (dst_cem) + { + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + { + assert(num_dst_endpoint_vals == 4); + pDst_endpoints[2] = (uint8_t)ise_a_val; + pDst_endpoints[3] = (uint8_t)ise_a_val; + break; + } + case astc_helpers::CEM_LDR_RGBA_DIRECT: + { + assert(num_dst_endpoint_vals == 8); + pDst_endpoints[6] = (uint8_t)ise_a_val; + pDst_endpoints[7] = (uint8_t)ise_a_val; + break; + } + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: + { + assert(num_dst_endpoint_vals == 6); + pDst_endpoints[4] = (uint8_t)ise_a_val; + pDst_endpoints[5] = (uint8_t)ise_a_val; + break; + } + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + assert(num_dst_endpoint_vals == 8); + + // alphas should decode to 255, 255 + pDst_endpoints[6] = (uint8_t)ise_a_val; // base + pDst_endpoints[7] = (uint8_t)dst_quant_tab[128]; // offset, top bit should be preserved + break; + } + default: + assert(0); + break; + } + + return true; + } + + } // !always_repack + + // Here the CEM's are not even in the same class. + // Do something reasonable to convert to the current block's CEM encoding and hope the residual distribution is reasonable. + + // fully decode the endpoints, undoing any quant, blue contraction or bit transfers + color_rgba prev_l, prev_h; + decode_endpoints(prev_cem, pPrev_endpoints, prev_endpoint_ise_range, prev_l, prev_h); + + uint8_t new_endpoints8[astc_helpers::MAX_CEM_ENDPOINT_VALS] = { 0 }; + + // now pack the endpoints to the desired CEM + switch (dst_cem) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: + { + // Take previous endpoints and convert to luma/alpha low/high. + new_endpoints8[0] = (prev_l.r + prev_l.g + prev_l.b + 1) / 3; + new_endpoints8[1] = (prev_h.r + prev_h.g + prev_h.b + 1) / 3; + + if (dst_cem == astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT) + { + new_endpoints8[2] = prev_l.a; + new_endpoints8[3] = prev_h.a; + } + + // ensure L new_endpoints8[1]) + { + std::swap(new_endpoints8[0], new_endpoints8[1]); + std::swap(new_endpoints8[2], new_endpoints8[3]); + } + } + + // requant predicted 256 level endpoints to current endpoint quant level + return requantize_ise_endpoints(dst_cem, astc_helpers::BISE_256_LEVELS, new_endpoints8, dst_endpoint_ise_range, pDst_endpoints); + } + + case astc_helpers::CEM_LDR_RGB_DIRECT: + case astc_helpers::CEM_LDR_RGBA_DIRECT: + { + // Take previous endpoints and convert to rgb(a) direct, explictly preserving previous ordering (to preserve the previous's endpoints usage of blue contraction, if it used it). + new_endpoints8[0] = prev_l.r; + new_endpoints8[1] = prev_h.r; + + new_endpoints8[2] = prev_l.g; + new_endpoints8[3] = prev_h.g; + + new_endpoints8[4] = prev_l.b; + new_endpoints8[5] = prev_h.b; + + if (dst_cem == astc_helpers::CEM_LDR_RGBA_DIRECT) + { + new_endpoints8[6] = prev_l.a; + new_endpoints8[7] = prev_h.a; + } + + if (use_blue_contraction) + { + color_rgba enc_l(blue_contract_enc(prev_l, blue_contraction_clamped_flag, dst_dequant_tab[dst_quant_tab[prev_l.b]])); + color_rgba enc_h(blue_contract_enc(prev_h, blue_contraction_clamped_flag, dst_dequant_tab[dst_quant_tab[prev_h.b]])); + + if ((auto_disable_blue_contraction_if_clamped) && (blue_contraction_clamped_flag)) + { + use_blue_contraction = false; + } + else + { + new_endpoints8[0] = enc_h.r; + new_endpoints8[1] = enc_l.r; + + new_endpoints8[2] = enc_h.g; + new_endpoints8[3] = enc_l.g; + + new_endpoints8[4] = enc_h.b; + new_endpoints8[5] = enc_l.b; + + if (dst_cem == astc_helpers::CEM_LDR_RGBA_DIRECT) + { + new_endpoints8[6] = prev_h.a; + new_endpoints8[7] = prev_l.a; + } + } + } + + uint32_t s0 = new_endpoints8[0] + new_endpoints8[2] + new_endpoints8[4]; + uint32_t s1 = new_endpoints8[1] + new_endpoints8[3] + new_endpoints8[5]; + bool pack_used_blue_contraction = s1 < s0; + + if (pack_used_blue_contraction != use_blue_contraction) + { + if (s0 == s1) + { + assert(!pack_used_blue_contraction); + + // swapping won't work because sums are equal, so force dst to use blue contraction by nudgling a component + // require s1 (hc.r + hc.g + hc.b)) + { + std::swap(lc, hc); + } + } + + new_endpoints8[0] = hc.r; + new_endpoints8[1] = hc.g; + new_endpoints8[2] = hc.b; + +#if 0 + // TODO: remove FP here + vec3F lf((float)lc.r, (float)lc.g, (float)lc.b); + vec3F hf((float)hc.r, (float)hc.g, (float)hc.b); + + const float MAX_S = 255.0f / 256.0f; + + float scale = MAX_S; + + float d = lf.dot(hf); + float nrm = hf.norm(); + if (nrm > 0.0f) + scale = d / nrm; + + scale = basisu::clamp(scale, 0.0f, MAX_S); + + new_endpoints8[3] = (uint8_t)basisu::clamp((int)std::round(scale * 256.0f), 0, 255); // explictly not 255.0f, but 256.0f, decoder divides scale by 256.0f +#endif + + { + int id = (lc.r * hc.r) + (lc.g * hc.g) + (lc.b * hc.b); + int inrm = (hc.r * hc.r) + (hc.g * hc.g) + (hc.b * hc.b); + + const int IMAX_S = (1024 * 255) / 256; + + int iscale = IMAX_S; + if (inrm > 0) + iscale = (id * 1024) / inrm; + + iscale = basisu::clamp(iscale, 0, IMAX_S); + + iscale = (iscale + 2) >> 2; + iscale = basisu::clamp(iscale, 0, 255); + + //assert(basisu::iabs(new_endpoints8[3] - iscale) <= 1); + new_endpoints8[3] = static_cast(iscale); + } + + if (dst_cem == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A) + { + new_endpoints8[4] = lc.a; + new_endpoints8[5] = hc.a; + + if ((prev_cem != astc_helpers::CEM_LDR_RGB_BASE_SCALE) && + (prev_cem != astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A)) + { + // ensure val4 < val5 for proper lerping on single plane (correlated alpha) + if (new_endpoints8[4] > new_endpoints8[5]) + std::swap(new_endpoints8[4], new_endpoints8[5]); + } + } + + // requant predicted 256 level endpoints to current endpoint quant level + return requantize_ise_endpoints(dst_cem, astc_helpers::BISE_256_LEVELS, new_endpoints8, dst_endpoint_ise_range, pDst_endpoints); + } + case astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET: + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: + { + bool endpoints_swapped = false; + + return pack_base_offset(dst_cem, dst_endpoint_ise_range, pDst_endpoints, prev_l, prev_h, use_blue_contraction, auto_disable_blue_contraction_if_clamped, + blue_contraction_clamped_flag, base_ofs_clamped_flag, endpoints_swapped); + } + default: + { + assert(0); + return false; + } + } + + return true; + } + + // Assumes ise 20 (256 levels) + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color32& l, color32& h) + { + assert(astc_helpers::is_cem_ldr(cem_index)); + + int ldr_endpoints[4][2]; + astc_helpers::decode_endpoint(cem_index, ldr_endpoints, pEndpoint_vals); + + for (uint32_t c = 0; c < 4; c++) + { + assert((ldr_endpoints[c][0] >= 0) && (ldr_endpoints[c][0] <= 255)); + assert((ldr_endpoints[c][1] >= 0) && (ldr_endpoints[c][1] <= 255)); + + l[c] = (uint8_t)ldr_endpoints[c][0]; + h[c] = (uint8_t)ldr_endpoints[c][1]; + } + } + + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color32& l, color32& h, float* pScale) + { + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + uint8_t dequantized_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + for (uint32_t i = 0; i < total_endpoint_vals; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + decode_endpoints_ise20(cem_index, dequantized_endpoints, l, h); + + if ((pScale) && ((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A))) + { + *pScale = (float)dequantized_endpoints[3] * (1.0f / 256.0f); + } + } + + // Assumes ise 20 (256 levels) + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h) + { + assert(astc_helpers::is_cem_ldr(cem_index)); + + int ldr_endpoints[4][2]; + astc_helpers::decode_endpoint(cem_index, ldr_endpoints, pEndpoint_vals); + + for (uint32_t c = 0; c < 4; c++) + { + assert((ldr_endpoints[c][0] >= 0) && (ldr_endpoints[c][0] <= 255)); + assert((ldr_endpoints[c][1] >= 0) && (ldr_endpoints[c][1] <= 255)); + + l[c] = (uint8_t)ldr_endpoints[c][0]; + h[c] = (uint8_t)ldr_endpoints[c][1]; + } + } + + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale) + { + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(cem_index); + + const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val; + + uint8_t dequantized_endpoints[astc_helpers::MAX_CEM_ENDPOINT_VALS]; + for (uint32_t i = 0; i < total_endpoint_vals; i++) + dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]]; + + decode_endpoints_ise20(cem_index, dequantized_endpoints, l, h); + + if ((pScale) && ((cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE) || (cem_index == astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A))) + { + *pScale = (float)dequantized_endpoints[3] * (1.0f / 256.0f); + } + } + + // TODO: Duplicated in astc_hdr + void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) + { + assert((block_width >= 2) && (block_width <= astc_helpers::MAX_BLOCK_DIM)); + assert((block_height >= 2) && (block_height <= astc_helpers::MAX_BLOCK_DIM)); + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + + const uint32_t num_block_samples = block_width * block_height; + const uint32_t num_grid_samples = grid_width * grid_height; + + astc_helpers::weighted_sample samples[astc_helpers::MAX_BLOCK_DIM * astc_helpers::MAX_BLOCK_DIM]; + basisu::clear_obj(samples); + + astc_helpers::compute_upsample_weights(block_width, block_height, grid_width, grid_height, samples); + + // Compute upsample matrix: output num_block_samples (rows), input num_grid_samples (cols) + upsample_matrix.resize_rows_cols(num_block_samples, num_grid_samples); + + basisu::vector weights(num_grid_samples); + + // compute which source sample(s) contribute to it. + for (uint32_t d = 0; d < num_block_samples; d++) + { + const astc_helpers::weighted_sample& ws = samples[d]; + + weights.set_all(0.0f); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + float w = ws.m_weights[y][x] * (1.0f / 16.0f); + if (!w) + continue; + + assert((ws.m_src_x + x) < grid_width); + assert((ws.m_src_y + y) < grid_height); + + assert(weights[(ws.m_src_x + x) + (ws.m_src_y + y) * grid_width] == 0.0f); + weights[(ws.m_src_x + x) + (ws.m_src_y + y) * grid_width] = w; + } // x + } // y + + for (uint32_t i = 0; i < num_grid_samples; i++) + upsample_matrix.at_row_col(d, i) = weights[i]; + + } // d + } + + // TODO: Only needed by ASTC LDR encoder + void compute_adjoint_downsample_matrix(basisu::vector& downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) + { + assert((block_width >= 2) && (block_width <= astc_helpers::MAX_BLOCK_DIM)); + assert((block_height >= 2) && (block_height <= astc_helpers::MAX_BLOCK_DIM)); + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + + const uint32_t num_block_samples = block_width * block_height; + const uint32_t num_grid_samples = grid_width * grid_height; + + // Compute upsample matrix: output num_block_samples (rows), input num_grid_samples (cols) + basisu::vector2D upsample_matrix; + compute_upsample_matrix(upsample_matrix, block_width, block_height, grid_width, grid_height); + + basisu::vector Dinv(num_grid_samples); + for (uint32_t j = 0; j < num_grid_samples; j++) + { + float sum = 0.0f; + + for (uint32_t i = 0; i < num_block_samples; i++) + sum += upsample_matrix.at_row_col(i, j); + + if (sum > 0.0f) + Dinv[j] = 1.0f / sum; + } + + // Create downsample matrix: num_grid_samples rows, num_block_samples cols + downsample_matrix.resize(num_grid_samples * num_block_samples); + downsample_matrix.set_all(0.0f); + + for (uint32_t j = 0; j < num_grid_samples; ++j) + for (uint32_t i = 0; i < num_block_samples; ++i) + downsample_matrix[j * num_block_samples + i] = Dinv[j] * upsample_matrix.at_row_col(i, j); + } + + const astc_block_grid_data* find_astc_block_grid_data(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) + { + auto find_res(g_astc_block_grid_data_hash.find(astc_block_grid_config(block_width, block_height, grid_width, grid_height))); + assert(find_res != g_astc_block_grid_data_hash.end()); + + return &find_res->second; + } + + void init_astc_block_grid_data_hash() + { + if (g_astc_block_grid_data_hash.size()) + return; + + g_astc_block_grid_data_hash.clear(); + g_astc_block_grid_data_hash.reserve(384); + + // TODO: Iterate over all valid block sizes more efficiently + for (uint32_t block_h = 4; block_h <= 12; block_h++) + { + for (uint32_t block_w = 4; block_w <= 12; block_w++) + { + if (!astc_helpers::is_valid_block_size(block_w, block_h)) + continue; + + for (uint32_t grid_h = 2; grid_h <= block_h; grid_h++) + { + for (uint32_t grid_w = 2; grid_w <= block_w; grid_w++) + { + const int bw = block_w, bh = block_h; + const int gw = grid_w, gh = grid_h; + + const int num_texels = bw * bh; + const int num_weights = gw * gh; + + basisu::vector2D upsample_matrix; + compute_upsample_matrix(upsample_matrix, bw, bh, gw, gh); + + float accum = 0.0f; + for (int t = 0; t < num_texels; ++t) + { + float row_sum_sq = 0.0f; + const float* row = &(upsample_matrix.get_ptr())[t * num_weights]; + + for (int i = 0; i < num_weights; ++i) + { + float w = row[i]; + row_sum_sq += w * w; + } + + accum += row_sum_sq; + } + + // estimate of MSE weight quantization reduction due to bilinear weight grid upsampling + // TODO: Gamma is used during encoding now, not transcoding. + const float weight_gamma = accum / (float)num_texels; + + astc_block_grid_data grid_data(weight_gamma); + grid_data.m_upsample_matrix = upsample_matrix; + + basisu::vector& downsample_matrix = grid_data.m_downsample_matrix; + compute_adjoint_downsample_matrix(downsample_matrix, bw, bh, gw, gh); + + auto res = g_astc_block_grid_data_hash.insert(astc_block_grid_config(bw, bh, gw, gh), grid_data); + assert(res.second); + BASISU_NOTE_UNUSED(res); + + } // grid_w + } // grid_h + + } // block_h + + } // block_w + } + +#include "basisu_idct.h" + +#if 0 + typedef void (*idct_1d_func_ptr)(const float* src, int src_stride, float* dst, int dst_stride); + + const idct_1d_func_ptr g_idct_1d_func_ptrs[11] = + { + idct_1d_2, + idct_1d_3, + idct_1d_4, + + idct_1d_5, + idct_1d_6, + idct_1d_7, + idct_1d_8, + + idct_1d_9, + idct_1d_10, + idct_1d_11, + idct_1d_12, + }; +#endif + + static inline void idct_2d(const float* pSrc, float* pDst, uint32_t num_rows, uint32_t num_cols) + { + assert((num_rows >= 2) && (num_rows <= 12)); + assert((num_cols >= 2) && (num_cols <= 12)); + + float temp[12 * 12]; + + // IDCT cols from src to temp + +#if 0 + // This works but uses slow WASM indirect calls + + const idct_1d_func_ptr pCol_xform = g_idct_1d_func_ptrs[num_rows - 2]; + for (uint32_t c = 0; c < num_cols; c++) + { + (*pCol_xform)(pSrc + c, num_cols, temp + c, num_cols); + } +#else + switch (num_rows) + { + case 2: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_2(pSrc + c, num_cols, temp + c, num_cols); + break; + case 3: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_3(pSrc + c, num_cols, temp + c, num_cols); + break; + case 4: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_4(pSrc + c, num_cols, temp + c, num_cols); + break; + case 5: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_5(pSrc + c, num_cols, temp + c, num_cols); + break; + case 6: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_6(pSrc + c, num_cols, temp + c, num_cols); + break; + case 7: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_7(pSrc + c, num_cols, temp + c, num_cols); + break; + case 8: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_8(pSrc + c, num_cols, temp + c, num_cols); + break; + case 9: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_9(pSrc + c, num_cols, temp + c, num_cols); + break; + case 10: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_10(pSrc + c, num_cols, temp + c, num_cols); + break; + case 11: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_11(pSrc + c, num_cols, temp + c, num_cols); + break; + case 12: + default: + for (uint32_t c = 0; c < num_cols; c++) + idct_1d_12(pSrc + c, num_cols, temp + c, num_cols); + break; + } +#endif + + // IDCT rows from temp to dst + +#if 0 + // This works but uses slow WASM indirect calls + const idct_1d_func_ptr pRow_xform = g_idct_1d_func_ptrs[num_cols - 2]; + + float* pTemp_row = temp; + float* pDst_row = pDst; + for (uint32_t r = 0; r < num_rows; r++) + { + (*pRow_xform)(pTemp_row, 1, pDst_row, 1); + pTemp_row += num_cols; + pDst_row += num_cols; + } +#else + float* pTemp_row = temp; + float* pDst_row = pDst; + switch (num_cols) + { + case 2: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_2(pTemp_row, 1, pDst_row, 1); + break; + case 3: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_3(pTemp_row, 1, pDst_row, 1); + break; + case 4: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_4(pTemp_row, 1, pDst_row, 1); + break; + case 5: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_5(pTemp_row, 1, pDst_row, 1); + break; + case 6: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_6(pTemp_row, 1, pDst_row, 1); + break; + case 7: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_7(pTemp_row, 1, pDst_row, 1); + break; + case 8: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_8(pTemp_row, 1, pDst_row, 1); + break; + case 9: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_9(pTemp_row, 1, pDst_row, 1); + break; + case 10: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_10(pTemp_row, 1, pDst_row, 1); + break; + case 11: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_11(pTemp_row, 1, pDst_row, 1); + break; + case 12: + default: + for (uint32_t r = 0; r < num_rows; r++, pTemp_row += num_cols, pDst_row += num_cols) + idct_1d_12(pTemp_row, 1, pDst_row, 1); + break; + } +#endif + } + + bool dct2f::init(uint32_t rows, uint32_t cols) + { + if ((rows < 2u) || (rows > cMaxSize) || + (cols < 2u) || (cols > cMaxSize)) + { + assert(0); + return false; + } + + m_rows = rows; + m_cols = cols; + + m_c_col.assign(m_rows * m_rows, 0.0f); + m_c_row.assign(m_cols * m_cols, 0.0f); + m_a_col.assign(m_rows, 0.0f); + m_a_row.assign(m_cols, 0.0f); + + const float pi = 3.14159265358979323846f; + + // alpha scaling + const float inv_m = 1.0f / static_cast(m_rows); + m_a_col[0] = sqrtf(inv_m); + for (uint32_t u = 1; u < m_rows; ++u) + m_a_col[u] = sqrtf(2.0f * inv_m); + + const float inv_n = 1.0f / static_cast(m_cols); + m_a_row[0] = sqrtf(inv_n); + for (uint32_t v = 1; v < m_cols; ++v) + m_a_row[v] = sqrtf(2.0f * inv_n); + + // cos tables + for (uint32_t u = 0; u < m_rows; ++u) + { + for (uint32_t x = 0; x < m_rows; ++x) + { + float angle = (pi * static_cast((2 * x + 1) * u)) / (2.0f * static_cast(m_rows)); + m_c_col[u * m_rows + x] = cosf(angle); + } + } + + for (uint32_t v = 0; v < m_cols; ++v) + { + for (uint32_t y = 0; y < m_cols; ++y) + { + float angle = (pi * static_cast((2 * y + 1) * v)) / (2.0f * static_cast(m_cols)); + m_c_row[v * m_cols + y] = cosf(angle); + } + } + + return true; + } + + void dct2f::forward(const float* pSrc, float* pDst, fvec& work) const + { + forward(pSrc, m_cols, pDst, m_cols, work); + } + + void dct2f::inverse(const float* pSrc, float* pDst, fvec& work) const + { + inverse(pSrc, m_cols, pDst, m_cols, work); + } + + void dct2f::inverse_check(const float* pSrc, float* pDst, fvec& work) const + { + inverse_check(pSrc, m_cols, pDst, m_cols, work); + } + + void dct2f::forward(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const + { + assert(m_rows && m_cols); + work.resize(m_rows * m_cols); + + const uint32_t m = m_rows, n = m_cols; + + float* pWork = &work[0]; + + // horizontal + for (uint32_t x = 0; x < m; ++x) + { + const float* pRowIn = pSrc + x * src_stride; + float* pRowT = pWork + x * n; + for (uint32_t v = 0; v < n; ++v) + { + const float* pCv = &m_c_row[v * n]; + float s = 0.0f; + for (uint32_t y = 0; y < n; ++y) + { + s += pRowIn[y] * pCv[y]; + } + pRowT[v] = s * m_a_row[v]; + } + } + + // vertical + for (uint32_t v = 0; v < n; ++v) + { + for (uint32_t u = 0; u < m; ++u) + { + const float* pCu = &m_c_col[u * m]; + float s = 0.0f; + for (uint32_t x = 0; x < m; ++x) + { + s += pWork[x * n + v] * pCu[x]; + } + pDst[u * dst_stride + v] = s * m_a_col[u]; + } + } + } + + // src_stride/dst_stride must be m_cols + void dct2f::inverse(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const + { + BASISU_NOTE_UNUSED(src_stride); + BASISU_NOTE_UNUSED(dst_stride); + +#if 0 + assert(m_rows && m_cols); + work.resize(m_rows * m_cols); + + const uint32_t m = m_rows, n = m_cols; + float* pWork = &work[0]; + + // vertical + for (uint32_t v = 0; v < n; ++v) // cols + { + float sums[cMaxSize] = { 0 }; + + for (uint32_t u = 0; u < m; ++u) // rows + { + if (((const uint32_t*)pSrc)[u * src_stride + v] == 0) + continue; + + float yU = pSrc[u * src_stride + v]; // most coeffs will be 0 + //if (yU == 0.0f) + // continue; + + yU *= m_a_col[u]; + + for (uint32_t x = 0; x < m; ++x) + { + const float cU = m_c_col[u * m + x]; + sums[x] += yU * cU; + } // x + + } // u + + for (uint32_t x = 0; x < m; ++x) + pWork[x * n + v] = sums[x]; + + } // v + + // horizontal + for (uint32_t x = 0; x < m; ++x) // rows + { + const float* pRowT = pWork + x * n; + float* pRowOut = pDst + x * dst_stride; + + for (uint32_t y = 0; y < n; ++y) // cols + { + float s = 0.0f; + for (uint32_t v = 0; v < n; ++v) // cols + { + const float cV = m_c_row[v * n + y]; + s += (pRowT[v] * m_a_row[v]) * cV; + } + pRowOut[y] = s; + } + } +#else + BASISU_NOTE_UNUSED(work); + assert(src_stride == m_cols); + assert(dst_stride == m_cols); + idct_2d(pSrc, pDst, m_rows, m_cols); +#endif + } + + void dct2f::inverse_check(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const + { + assert(m_rows && m_cols); + work.resize(m_rows * m_cols); + + const uint32_t m = m_rows, n = m_cols; + float* pWork = &work[0]; + + // vertical + for (uint32_t v = 0; v < n; ++v) + { + for (uint32_t x = 0; x < m; ++x) + { + float s = 0.0f; + for (uint32_t u = 0; u < m; ++u) + { + const float yU = pSrc[u * src_stride + v]; + const float cU = m_c_col[u * m + x]; + s += (yU * m_a_col[u]) * cU; + } + pWork[x * n + v] = s; + } + } + + // horizontal + for (uint32_t x = 0; x < m; ++x) // rows + { + const float* pRowT = pWork + x * n; + float* pRowOut = pDst + x * dst_stride; + + for (uint32_t y = 0; y < n; ++y) // cols + { + float s = 0.0f; + for (uint32_t v = 0; v < n; ++v) // cols + { + const float cV = m_c_row[v * n + y]; + s += (pRowT[v] * m_a_row[v]) * cV; + } + pRowOut[y] = s; + } + } + } + + static int* generate_zigzag_order(int width, int height) + { + assert((width > 0) && (height > 0)); + + const int total = width * height; + int* pOrder = (int*)malloc(total * sizeof(int)); + if (!pOrder) + return nullptr; + + int idx = 0; + for (int s = 0; s < (width + height - 1); ++s) + { + // Start x at max(0, s - height + 1), end at min(s, width - 1) + const int x_start = (s < height) ? 0 : (s - height + 1); + const int x_end = (s < width) ? s : (width - 1); + + // Diagonal size + const int diag_size = x_end - x_start + 1; + int* pDiag = (int*)malloc(diag_size * sizeof(int)); + if (!pDiag) + { + free(pOrder); + return nullptr; + } + + int j = 0; + for (int x = x_start; x <= x_end; ++x) + { + int y = s - x; + assert(j < diag_size); + pDiag[j++] = x + y * width; + } + + // Reverse if s is odd (alternate direction) + if ((s & 1) == 1) + { + for (int k = diag_size - 1; k >= 0; --k) + { + assert(idx < total); + pOrder[idx++] = pDiag[k]; + } + } + else + { + for (int k = 0; k < diag_size; ++k) + { + assert(idx < total); + pOrder[idx++] = pDiag[k]; + } + } + + free(pDiag); + } + + return pOrder; + } + + static const int g_baseline_jpeg_y[8][8] = + { + // DC element modified so bilinear fetches near (0,0) grab a smaller quant table value, protecting most important LF coefficients + { 4, 11, 10, 16, 24, 40, 51, 61 }, + { 12, 12, 14, 19, 26, 58, 60, 55 }, + { 14, 13, 16, 24, 40, 57, 69, 56 }, + { 14, 17, 22, 29, 51, 87, 80, 62 }, + { 18, 22, 37, 56, 68,109,103, 77 }, + { 24, 35, 55, 64, 81,104,113, 92 }, + { 49, 64, 78, 87,103,121,120,101 }, + { 72, 92, 95, 98,112,100,103, 99 } + }; + + // centers at (0,0) + static inline float sample_jpeg_quant(const int Q8[8][8], float i, float j) + { + i = basisu::minimum(basisu::maximum(i, 0.0f), 7.0f); + j = basisu::minimum(basisu::maximum(j, 0.0f), 7.0f); + int i0 = (int)floorf(i), j0 = (int)floorf(j); + int i1 = basisu::minimum(i0 + 1, 7), j1 = basisu::minimum(j0 + 1, 7); + float ti = i - i0, tj = j - j0; + float a = (1 - ti) * Q8[j0][i0] + ti * Q8[j0][i1]; + float b = (1 - ti) * Q8[j1][i0] + ti * Q8[j1][i1]; + return (1 - tj) * a + tj * b; + } + + void grid_weight_dct::init(uint32_t block_width, uint32_t block_height) + { + m_block_width = block_width; + m_block_height = block_height; + + for (uint32_t grid_height = 2; grid_height <= block_height; grid_height++) + { + for (uint32_t grid_width = 2; grid_width <= block_width; grid_width++) + { + // Check if this is a valid ASTC weight grid dimension + if ((grid_width * grid_height) > astc_helpers::MAX_GRID_WEIGHTS) + continue; + + auto ins_res = m_grid_dim_key_vals.insert(grid_dim_key(grid_width, grid_height), grid_dim_value()); + auto& val = ins_res.first->second; + + val.m_dct.init(grid_height, grid_width); + + int* pZigZag = generate_zigzag_order(grid_width, grid_height); + + basisu::int_vec v(grid_width * grid_height); + memcpy(v.data(), pZigZag, sizeof(int) * grid_width * grid_height); + + free(pZigZag); + + val.m_zigzag.swap(v); + + } // w + } // h + } + + // This can used FP as it only impacts the final decoded weights (not future blocks) + bool grid_weight_dct::decode_block_weights( + float q, uint32_t plane_index, // plane of weights to decode and IDCT from stream + astc_helpers::log_astc_block& log_blk, // must be initialized except for the plane weights which are decoded + basist::bitwise_decoder* pDec, + const astc_block_grid_data* pGrid_data, // grid data for this grid size + block_stats* pS, + fvec& dct_work, + const dct_syms* pSyms) const + { + const uint32_t grid_width = log_blk.m_grid_width, grid_height = log_blk.m_grid_height; + const uint32_t total_grid_samples = grid_width * grid_height; + const uint32_t num_planes = log_blk.m_dual_plane ? 2 : 1; + + //const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_ISE_to_val; + const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_val_to_ise; + + auto grid_dim_vals_iter = m_grid_dim_key_vals.find(grid_dim_key(grid_width, grid_height)); + + if (grid_dim_vals_iter == m_grid_dim_key_vals.end()) + { + // Invalid grid dimension for this block size + assert(0); + return false; + } + + auto& grid_dim_vals = grid_dim_vals_iter->second; + + const float span_len = get_max_span_len(log_blk, plane_index); + + const float level_scale = compute_level_scale(q, span_len, pGrid_data->m_weight_gamma, grid_width, grid_height, log_blk.m_weight_ise_range); + + float scaled_weight_coding_scale = SCALED_WEIGHT_BASE_CODING_SCALE; + if (log_blk.m_weight_ise_range <= astc_helpers::BISE_8_LEVELS) + scaled_weight_coding_scale = 1.0f / 8.0f; + + float mean_weight = 0; + + if (pDec) + mean_weight = (float)pDec->decode_truncated_binary((uint32_t)(64.0f * scaled_weight_coding_scale) + 1) / (float)scaled_weight_coding_scale; + else if (pSyms) + mean_weight = (float)pSyms->m_dc_sym / (float)scaled_weight_coding_scale; + else + { + assert(0); + return false; + } + + if (pS) + { + pS->m_mean_weight = mean_weight; + pS->m_total_coded_acs = 0; + pS->m_max_ac_coeff = 0; + } + + float dct_weights[astc_helpers::MAX_BLOCK_PIXELS]; + + const auto& zigzag = grid_dim_vals.m_zigzag; + + basisu::clear_obj(dct_weights); + + sample_quant_table_state quant_state; + quant_state.init(q, m_block_width, m_block_height, level_scale); + + if (pDec) + { + for (uint32_t zig_idx = 1; zig_idx < total_grid_samples; zig_idx++) + { + uint32_t run_len = pDec->decode_rice(m_zero_run); + + if ((run_len + zig_idx) > total_grid_samples) + return false; + + zig_idx += run_len; + + if (zig_idx >= total_grid_samples) + break; + + int sign = pDec->get_bits(1); + + int coeff = pDec->decode_rice(m_coeff); + + if (sign) + coeff = -coeff; + + int dct_idx = zigzag[zig_idx]; + + const uint32_t y = (uint32_t)dct_idx / grid_width; + const uint32_t x = (uint32_t)dct_idx % grid_width; + + //const int quant = dct_quant_tab[dct_idx]; + const int quant = sample_quant_table(quant_state, x, y); + //assert(quant == sample_quant_table(quant_state, x, y)); + + dct_weights[dct_idx] = dequant_deadzone(coeff, quant, DEADZONE_ALPHA, x, y); + + if (pS) + { + ++pS->m_total_coded_acs; + pS->m_max_ac_coeff = basisu::maximum(pS->m_max_ac_coeff, basisu::iabs(coeff)); + } + } + } + else + { + uint32_t zig_idx = 1; + uint32_t coeff_ofs = 0; + while (coeff_ofs < pSyms->m_coeffs.size()) + { + const uint32_t run_len = pSyms->m_coeffs[coeff_ofs].m_num_zeros; + const int coeff = pSyms->m_coeffs[coeff_ofs].m_coeff; + coeff_ofs++; + + if ((run_len + zig_idx) > total_grid_samples) + return false; + + zig_idx += run_len; + + if (zig_idx >= total_grid_samples) + break; + + assert(coeff != INT_MAX); + + int dct_idx = zigzag[zig_idx]; + + const uint32_t y = (uint32_t)dct_idx / grid_width; + const uint32_t x = (uint32_t)dct_idx % grid_width; + + //const int quant = dct_quant_tab[dct_idx]; + const int quant = sample_quant_table(quant_state, x, y); + //assert(quant == sample_quant_table(quant_state, x, y)); + + dct_weights[dct_idx] = dequant_deadzone(coeff, quant, DEADZONE_ALPHA, x, y); + + if (pS) + { + ++pS->m_total_coded_acs; + pS->m_max_ac_coeff = basisu::maximum(pS->m_max_ac_coeff, basisu::iabs(coeff)); + } + + zig_idx++; + } + } + + float idct_weights[astc_helpers::MAX_BLOCK_PIXELS]; + + grid_dim_vals.m_dct.inverse(dct_weights, idct_weights, dct_work); + +#if defined(_DEBUG) || defined(DEBUG) + // Sanity check IDCT vs. less optimized variant + // Also quant table sanity check vs. sample_quant_table(). + { + float idct_weights_temp[astc_helpers::MAX_BLOCK_PIXELS]; + grid_dim_vals.m_dct.inverse_check(dct_weights, idct_weights_temp, dct_work); + + int dct_quant_tab[astc_helpers::MAX_BLOCK_PIXELS]; + compute_quant_table(q, grid_width, grid_height, level_scale, dct_quant_tab); + + for (uint32_t i = 0; i < grid_width * grid_height; i++) + { + assert(basisu::equal_tol(idct_weights[i], idct_weights_temp[i], .00125f)); + + assert(!i || (dct_quant_tab[i] == sample_quant_table(quant_state, i % grid_width, i / grid_width))); + } + } +#endif + + // Compute final grid weights + for (uint32_t y = 0; y < grid_height; y++) + for (uint32_t x = 0; x < grid_width; x++) + log_blk.m_weights[(x + y * grid_width) * num_planes + plane_index] = quant_tab[basisu::clamp(fast_roundf_int(mean_weight + idct_weights[x + y * grid_width]), 0, 64)]; + + return true; + } + + // results of calling scale_quant_steps() for each # of ASTC weight levels + static const float g_scale_quant_steps[12] = { 1.51333141f, 1.41198814f, 1.35588217f, 1.31743157f, 1.28835952f, 1.24573100f, 1.21481407f, 1.19067919f, 1.15431654f, 1.12734985f, 1.10601568f, 1.07348967f }; + + // Adaptive quantization + float grid_weight_dct::compute_level_scale(float q, float span_len, float weight_gamma, uint32_t grid_width, uint32_t grid_height, uint32_t weight_ise_range) const + { + BASISU_NOTE_UNUSED(weight_gamma); + BASISU_NOTE_UNUSED(grid_width); + BASISU_NOTE_UNUSED(grid_height); + + assert((weight_ise_range >= astc_helpers::BISE_2_LEVELS) && (weight_ise_range <= astc_helpers::BISE_32_LEVELS)); + + // Standard JPEG quality factor calcs + // TODO: Precompute this once + float level_scale; + q = basisu::clamp(q, 1.0f, 100.0f); + if (q < 50.0f) + level_scale = 5000.0f / q; + else + level_scale = 200.0f - 2.0f * q; + + level_scale *= (1.0f / 100.0f); // because JPEG's quant table is scaled by 100 + + //const float span_floor = 28.0f; + const float span_floor = 14.0f; + //const float adaptive_factor = 255.0f / maximum(span_len, span_floor); + // 64.0 = dynamic range adjustment (JPEG uses 255) + // divide by span len to adjustment adaptive low/high values per-block (JPEG always uses effective span=0-255) + // actually (64/255) * 255/max(span_len, span_floor) + float adaptive_factor = 64.0f / basisu::maximum(span_len, span_floor); + + // input signal scalar quantization noise will be distributed between multiple AC coefficients - compensate by adaptively adjusting the quant step size + float weight_quant_adaptive_factor = g_scale_quant_steps[weight_ise_range]; + adaptive_factor *= weight_quant_adaptive_factor; + + // sanity + assert(fabs(weight_quant_adaptive_factor - scale_quant_steps(astc_helpers::get_ise_levels(weight_ise_range))) < .000125f); + + // Adjust for ASTC weight grid bilinear upsampling using precomputed constants depending on the weight grid dims (usually .5-1.0, smaller grids=lower weights) + // This compensates for weight quant error being smoothed out due to bilinear. + // It's unclear if this is actually useful, and looks worse on smaller weight grids. + //level_scale *= adaptive_factor / sqrtf(weight_gamma); // weight_gamma is power domain, not amplitude + + // (Adaptive quant) + level_scale *= adaptive_factor; + + // The higher the level_scale, the more quantized DCT coefficients will be and vice versa. + + return level_scale; + } + + int grid_weight_dct::sample_quant_table(sample_quant_table_state& state, uint32_t x, uint32_t y) const + { + assert(x || y); + + if (state.m_q >= 100.0f) + return 1; + + float ny = float(y); + float ry = ny * state.m_sy; + + float nx = float(x); + float rx = nx * state.m_sx; + + assert(x || y); + + // sample from the JPEG baseline luma 8x8 DCT quant matrix + // this is an approximation (we could do an actual desired radians per spatial sample search vs. each of the 8x8 basis vectors to find the best, most conservative mapping), + // but for 4x4 and 6x6 block sizes it's reasonable enough and simple/fast + // at 4x4, the lowest frequencies are slightly more heavily quantized than we would want (but the quant table entries near DC are so similar it's doubtful it matters much if at all) + //float base = sample_jpeg_quant(g_baseline_jpeg_y, rx, ry); + + float base; + { + float i = rx, j = ry; + assert((i >= 0.0f) && (j >= 0.0f)); + + i = basisu::minimum(i, 7.0f); + j = basisu::minimum(j, 7.0f); + + int i0 = (int)(i), j0 = (int)(j); + int i1 = basisu::minimum(i0 + 1, 7), j1 = basisu::minimum(j0 + 1, 7); + + float ti = i - i0, tj = j - j0; + float a = (1 - ti) * g_baseline_jpeg_y[j0][i0] + ti * g_baseline_jpeg_y[j0][i1]; + float b = (1 - ti) * g_baseline_jpeg_y[j1][i0] + ti * g_baseline_jpeg_y[j1][i1]; + + base = (1 - tj) * a + tj * b; + } + + int quant_scale = (int)(base * state.m_level_scale + 0.5f); + + quant_scale = basisu::maximum(1, quant_scale); + + return quant_scale; + } + + void grid_weight_dct::compute_quant_table(float q, + uint32_t grid_width, uint32_t grid_height, + float level_scale, int* dct_quant_tab) const + { + assert(q > 0.0f); + + dct_quant_tab[0] = 1; + + if (q >= 100.0f) + { + for (uint32_t y = 0; y < grid_height; y++) + for (uint32_t x = 0; x < grid_width; x++) + if (x || y) + dct_quant_tab[x + y * grid_width] = 1; + return; + } + + const int Bx = m_block_width, By = m_block_height; + + const float sx = (float)8.0f / (float)Bx; + const float sy = (float)8.0f / (float)By; + + for (uint32_t y = 0; y < grid_height; y++) + { + float ny = float(y); + float ry = ny * sy; + + for (uint32_t x = y ? 0 : 1; x < grid_width; x++) + { + int quant_scale = 0; + + assert(x || y); + + float nx = float(x); + float rx = nx * sx; + + // sample from the JPEG baseline luma 8x8 DCT quant matrix + // this is an approximation (we could do an actual desired radians per spatial sample search vs. each of the 8x8 basis vectors to find the best, most conservative mapping), + // but for 4x4 and 6x6 block sizes it's reasonable enough and simple/fast + // at 4x4, the lowest frequencies are slightly more heavily quantized than we would want (but the quant table entries near DC are so similar it's doubtful it matters much if at all) + float base = sample_jpeg_quant(g_baseline_jpeg_y, rx, ry); + + //quant_scale = (int)std::floor(base * level_scale + 0.5f); + quant_scale = (int)(base * level_scale + 0.5f); + assert(quant_scale == (int)std::floor(base * level_scale + 0.5f)); + + quant_scale = basisu::maximum(1, quant_scale); + + dct_quant_tab[x + y * grid_width] = quant_scale; + } // x + } // y + } + + // Needed by AQ + float grid_weight_dct::get_max_span_len(const astc_helpers::log_astc_block& log_blk, uint32_t plane_index) const + { + float span_len = 0.0f; + + if (log_blk.m_dual_plane) + { + color32 l, h; + decode_endpoints(log_blk.m_color_endpoint_modes[0], log_blk.m_endpoints, log_blk.m_endpoint_ise_range, l, h); + + for (uint32_t c = 0; c < 4; c++) + { + if (plane_index == 1) + { + if (c == log_blk.m_color_component_selector) + { + span_len += basisu::squaref((float)h[c] - (float)l[c]); + } + } + else + { + if (c != log_blk.m_color_component_selector) + { + span_len += basisu::squaref((float)h[c] - (float)l[c]); + } + } + } + + span_len = sqrtf(span_len); + } + else + { + for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) + { + color32 l, h; + decode_endpoints(log_blk.m_color_endpoint_modes[0], log_blk.m_endpoints + astc_helpers::get_num_cem_values(log_blk.m_color_endpoint_modes[0]) * i, log_blk.m_endpoint_ise_range, l, h); + + float part_span_len = sqrtf( + basisu::squaref((float)h.r - (float)l.r) + basisu::squaref((float)h.g - (float)l.g) + basisu::squaref((float)h.b - (float)l.b) + basisu::squaref((float)h.a - (float)l.a) + ); + + span_len = basisu::maximum(part_span_len, span_len); + } + } + + return span_len; + } + +#include "basisu_astc_cfgs.inl" + + void create_encoder_trial_modes_table(uint32_t block_width, uint32_t block_height, + basisu::vector& encoder_trial_modes, grouped_trial_modes& grouped_encoder_trial_modes, + bool print_debug_info, bool print_modes) + { + //interval_timer itm; + //itm.start(); + + uint32_t mode_index = 0; + uint32_t max_grid_width = 0, max_grid_height = 0, max_grid_samples = 0; + + //encoder_trial_modes.reserve(BU_TOTAL_ASTC_CFGS); + encoder_trial_modes.reserve(3072); + encoder_trial_modes.resize(0); + + grouped_encoder_trial_modes.clear(); + + for (uint32_t cfg_index = 0; cfg_index < BU_TOTAL_ASTC_CFGS; cfg_index++) + { + assert((cfg_index * 3 + 2) < std::size(s_astc_cfg_table)); + uint32_t packed_mode = s_astc_cfg_table[cfg_index * 3] | (s_astc_cfg_table[cfg_index * 3 + 1] << 8) | (s_astc_cfg_table[cfg_index * 3 + 2] << 16); + + uint32_t endpoint_ise_range, weight_ise_range, ccs_index, num_subsets, unique_cem_index, grid_wh; + +#define BU_UNPACK_FIELD(val, bits) do { val = packed_mode & ((1u << (bits)) - 1u); packed_mode >>= (bits); } while(0) + BU_UNPACK_FIELD(endpoint_ise_range, CFG_PACK_EISE_BITS); + BU_UNPACK_FIELD(weight_ise_range, CFG_PACK_WISE_BITS); + BU_UNPACK_FIELD(ccs_index, CFG_PACK_CCS_BITS); + BU_UNPACK_FIELD(num_subsets, CFG_PACK_SUBSETS_BITS); + BU_UNPACK_FIELD(unique_cem_index, CFG_PACK_CEM_BITS); + BU_UNPACK_FIELD(grid_wh, CFG_PACK_GRID_BITS); +#undef BU_UNPACK_FIELD + + assert(!packed_mode); + + const uint32_t grid_width = (grid_wh / 11) + 2; + + // modes are sorted by grid widths, which is at/near the MSB of the packed values, rest must be >= + if (grid_width > block_width) + break; + + const uint32_t grid_height = (grid_wh % 11) + 2; + if (grid_height > block_height) + continue; + + const uint32_t cem_index = s_unique_ldr_index_to_astc_cem[unique_cem_index]; + +#if defined(_DEBUG) || defined(DEBUG) + { + // Ensure configuration is actually valid. + astc_helpers::log_astc_block log_block; + log_block.clear(); + log_block.m_grid_width = (uint8_t)grid_width; + log_block.m_grid_height = (uint8_t)grid_height; + log_block.m_num_partitions = (uint8_t)(num_subsets + 1); + log_block.m_dual_plane = (ccs_index != 0); + log_block.m_color_component_selector = (uint8_t)(ccs_index ? (ccs_index - 1) : 0); + log_block.m_num_partitions = (uint8_t)(num_subsets + 1); + log_block.m_endpoint_ise_range = (uint8_t)(endpoint_ise_range + astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE); + log_block.m_weight_ise_range = (uint8_t)(weight_ise_range + astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE); + + for (uint32_t i = 0; i < log_block.m_num_partitions; i++) + log_block.m_color_endpoint_modes[i] = (uint8_t)cem_index; + + astc_helpers::astc_block phys_block; + bool pack_success = astc_helpers::pack_astc_block(phys_block, log_block, nullptr, nullptr, astc_helpers::cValidateSkipFinalEndpointWeightPacking); + assert(pack_success); + } +#endif + + const uint32_t tm_index = encoder_trial_modes.size_u32(); + + trial_mode& tm = *encoder_trial_modes.enlarge(1); + + tm.m_ccs_index = (int)ccs_index - 1; + tm.m_cem = cem_index; + tm.m_endpoint_ise_range = endpoint_ise_range + astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE; + tm.m_weight_ise_range = weight_ise_range; + tm.m_grid_width = grid_width; + tm.m_grid_height = grid_height; + tm.m_num_parts = num_subsets + 1; + + grouped_encoder_trial_modes.add(block_width, block_height, tm, tm_index); + + if (print_modes) + { + max_grid_width = basisu::maximum(max_grid_width, grid_width); + max_grid_height = basisu::maximum(max_grid_height, grid_height); + max_grid_samples = basisu::maximum(max_grid_samples, grid_width * grid_height); + + basisu::debug_printf("%u: CEM: %u DP: %u, CCS: %i, SUBSETS: %u, GRID: %ux%u, ENDPOINTS: %u, WEIGHTS: %u\n", + mode_index, + tm.m_cem, tm.m_ccs_index >= 0, tm.m_ccs_index, tm.m_num_parts, + tm.m_grid_width, tm.m_grid_height, + astc_helpers::get_ise_levels(tm.m_endpoint_ise_range), + astc_helpers::get_ise_levels(tm.m_weight_ise_range)); + } + + mode_index++; + } // cfg_index + + if (print_debug_info) + { + //fmt_debug_printf("create_encoder_trial_modes_table() time: {} secs\n", itm.get_elapsed_secs()); + basisu::debug_printf("create_encoder_trial_modes_table() - ASTC %ux%u modes\n", block_width, block_height); + basisu::debug_printf("Total used trial mode groups: %u\n", grouped_encoder_trial_modes.count_used_groups()); + basisu::debug_printf("Total ASTC configurations iterated: %u\n", mode_index); + if (print_modes) + basisu::fmt_debug_printf("Max grid dimensions: {}x{}, max grid samples: {}\n", max_grid_width, max_grid_height, max_grid_samples); + } + } + + // Cached encoder trial modes for each block size, to avoid having to compute this for every texture/mipmap level. + basisu::vector g_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + grouped_trial_modes g_grouped_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + + grid_weight_dct g_grid_weight_dcts[astc_helpers::cTOTAL_BLOCK_SIZES]; + + // These tables could be initialized per transcoded texture, but that would result in per-texture overhead. + void init_transcoding_tables() + { + if (g_encoder_trial_modes[0].size()) + return; + + // We don't know what ASTC block sizes they're going to transcode, to prepare for all of them. + for (uint32_t i = 0; i < astc_helpers::cTOTAL_BLOCK_SIZES; i++) + { + const uint32_t block_width = astc_helpers::g_astc_block_sizes[i][0]; + const uint32_t block_height = astc_helpers::g_astc_block_sizes[i][1]; + + auto& encoder_trial_modes = g_encoder_trial_modes[i]; + auto& grouped_encoder_trial_modes = g_grouped_encoder_trial_modes[i]; + + encoder_trial_modes.reserve(3072); + create_encoder_trial_modes_table(block_width, block_height, encoder_trial_modes, grouped_encoder_trial_modes, false, false); + + g_grid_weight_dcts[i].init(block_width, block_height); + } // i + } + + const uint16_t g_total_unique_patterns[astc_helpers::NUM_ASTC_BLOCK_SIZES][2] = + { + { 437, 329 }, { 559, 405 }, { 659, 486 }, { 720, 534 }, + { 521, 333 }, { 584, 377 }, { 640, 410 }, { 672, 436 }, + { 710, 468 }, { 701, 476 }, { 759, 528 }, { 799, 568 }, + { 818, 597 }, { 838, 626 } + }; + + inline uint32_t get_total_unique_patterns(uint32_t astc_block_size_index, uint32_t num_parts) + { + assert(astc_block_size_index < astc_helpers::NUM_ASTC_BLOCK_SIZES); + assert((num_parts >= 2) && (num_parts <= 3)); + + return g_total_unique_patterns[astc_block_size_index][num_parts - 2]; + } + + const uint16_t g_unique_to_seed_4x4_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,21,23,24,25,26,27,28,29,30,33,36,37,39,42,43,44,45,47,48,49,50,51,53,54,55,56,58,59,61,62,63,65,66,68,69,70,71,72,73,74,75,76,78,83,87,89,90,91,94,95,98,99,100,101,107,108,109,110,111,113,114,115,116,119,121,122,124,125,128,129,130,131,134,135,137,138,139,142,143,144,146,147,149,150,151,156,158,159,161,165,167,168,169,170,171,172,174,175,177,181,183,184,191,194,195,196,198,199,203,204,206,207,208,210,211,213,214,215,216,218,220,222,226,227,230,231,232,235,236,239,245,246,247,248,249,250,252,253,254,255,257,258,260,262,264,270,271,273,277,278,279,280,281,284,291,293,299,302,304,305,306,307,309,314,319,324,325,326,327,329,330,335,337,339,341,343,344,347,348,351,352,354,355,359,362,368,370,373,374,375,376,380,386,387,388,389,394,395,399,404,409,411,412,418,419,422,423,426,430,432,438,441,443,445,447,453,455,463,471,474,475,476,478,479,484,487,488,489,490,491,495,496,498,500,504,510,511,513,517,518,523,524,526,527,529,530,531,534,539,542,546,547,549,553,558,567,578,581,583,586,587,591,593,594,595,598,600,601,602,605,607,611,612,614,615,619,622,625,627,631,633,634,638,639,643,647,649,655,658,661,662,663,664,666,672,673,674,681,683,684,686,690,693,694,695,696,700,703,705,707,713,716,719,720,724,726,727,730,731,732,736,742,751,754,756,762,764,766,769,770,773,774,778,780,789,791,796,798,799,801,802,804,807,810,811,812,818,819,821,826,828,831,833,834,836,839,840,842,847,849,852,868,872,873,877,881,886,887,888,890,895,897,898,899,902,903,906,911,914,915,919,923,924,930,934,937,938,943,945,947,948,950,951,954,958,959,963,964,966,967,971,976,983,987,988,993,994,995,998,999,1006,1007,1009,1013,1014,1015,1016,1019,1022,1023 } ; + const uint16_t g_unique_to_seed_5x4_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,21,23,24,25,26,27,28,29,30,31,33,36,37,39,42,43,44,45,47,48,49,50,51,53,54,55,56,58,59,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,78,83,87,88,89,90,91,94,95,97,98,99,100,101,107,108,109,110,111,113,114,115,116,119,121,122,124,125,128,129,130,131,132,134,135,137,138,139,142,143,144,145,146,147,149,150,151,153,154,156,157,158,159,161,165,167,168,169,170,171,172,174,175,177,181,183,184,185,188,190,191,194,195,196,198,199,200,203,204,206,207,208,210,211,213,214,215,216,217,218,220,222,225,226,227,229,230,231,232,235,236,239,245,246,247,248,249,250,252,253,254,255,257,258,260,261,262,264,265,267,270,271,273,275,277,278,279,280,281,282,284,287,291,293,295,296,299,300,302,304,305,306,307,309,314,317,319,323,324,325,326,327,329,330,332,335,337,339,341,342,343,344,347,348,349,350,351,352,354,355,359,361,362,365,368,370,373,374,375,376,380,381,386,387,388,389,391,394,395,399,404,405,407,409,410,411,412,418,419,420,422,423,426,430,432,438,439,441,443,445,447,449,453,454,455,462,463,465,471,473,474,475,476,478,479,482,484,486,487,488,489,490,491,495,496,498,500,501,503,504,505,508,510,511,513,516,517,518,519,521,523,524,526,527,529,530,531,533,534,538,539,542,546,547,549,550,551,553,554,558,563,567,569,572,575,578,579,581,583,586,587,591,593,594,595,598,600,601,602,605,606,607,608,611,612,614,615,616,619,622,623,625,627,631,633,634,636,638,639,643,645,647,649,652,655,658,661,662,663,664,665,666,668,672,673,674,675,681,683,684,686,687,690,692,693,694,695,696,697,700,702,703,705,707,709,711,713,716,719,720,724,725,726,727,730,731,732,736,739,742,748,751,754,756,758,762,763,764,766,768,769,770,772,773,774,776,778,780,782,786,789,791,792,796,798,799,801,802,804,807,810,811,812,814,818,819,821,823,826,828,830,831,833,834,835,836,839,840,842,845,847,849,852,858,861,866,868,870,871,872,873,876,877,878,881,886,887,888,890,891,895,897,898,899,901,902,903,906,909,911,914,915,919,923,924,927,929,930,933,934,935,936,937,938,941,942,943,945,947,948,950,951,954,955,958,959,963,964,966,967,970,971,975,976,980,983,986,987,988,993,994,995,997,998,999,1001,1006,1007,1009,1010,1013,1014,1015,1016,1019,1020,1022,1023 }; + const uint16_t g_unique_to_seed_5x5_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,21,23,24,25,26,27,28,29,30,31,33,34,36,37,39,42,43,44,45,47,48,49,50,51,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,81,82,83,87,88,89,90,91,92,94,95,97,98,99,100,101,105,107,108,109,110,111,113,114,115,116,119,120,121,122,124,125,128,129,130,131,132,134,135,137,138,139,142,143,144,145,146,147,148,149,150,151,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,176,177,181,182,183,184,185,188,190,191,193,194,195,196,198,199,200,203,204,206,207,208,209,210,211,213,214,215,216,217,218,220,222,224,225,226,227,229,230,231,232,235,236,239,240,242,245,246,247,248,249,250,252,253,254,255,256,257,258,260,261,262,264,265,267,270,271,273,275,276,277,278,279,280,281,282,283,284,286,287,289,291,293,294,295,296,298,299,300,302,303,304,305,306,307,309,313,314,317,318,319,323,324,325,326,327,329,330,331,332,335,337,338,339,341,342,343,344,347,348,349,350,351,352,354,355,356,359,361,362,365,368,370,373,374,375,376,377,378,380,381,386,387,388,389,390,391,392,393,394,395,399,403,404,405,407,409,410,411,412,415,418,419,420,421,422,423,424,426,430,432,433,437,438,439,440,441,443,444,445,446,447,449,453,454,455,458,462,463,465,466,469,470,471,473,474,475,476,478,479,481,482,484,486,487,488,489,490,491,492,495,496,498,500,501,502,503,504,505,506,508,509,510,511,513,516,517,518,519,521,523,524,526,527,529,530,531,533,534,538,539,542,546,547,548,549,550,551,553,554,558,559,561,562,563,567,569,572,575,578,579,580,581,583,585,586,587,590,591,593,594,595,596,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,619,620,621,622,623,625,627,631,633,634,636,638,639,643,645,646,647,648,649,652,654,655,658,661,662,663,664,665,666,668,672,673,674,675,681,683,684,686,687,688,690,692,693,694,695,696,697,699,700,701,702,703,705,707,709,711,713,714,716,719,720,722,724,725,726,727,730,731,732,736,738,739,742,748,751,753,754,756,758,760,762,763,764,766,768,769,770,772,773,774,776,778,780,782,783,786,789,791,792,796,798,799,801,802,804,806,807,808,810,811,812,813,814,818,819,821,823,826,828,830,831,833,834,835,836,839,840,841,842,845,847,849,851,852,858,861,866,868,869,870,871,872,873,874,876,877,878,879,881,886,887,888,890,891,893,894,895,897,898,899,901,902,903,906,909,910,911,912,914,915,917,919,920,921,923,924,927,929,930,933,934,935,936,937,938,941,942,943,945,947,948,950,951,954,955,958,959,961,962,963,964,966,967,968,970,971,975,976,977,980,983,986,987,988,993,994,995,996,997,998,999,1001,1006,1007,1009,1010,1013,1014,1015,1016,1018,1019,1020,1022,1023 }; + const uint16_t g_unique_to_seed_6x5_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,21,23,24,25,26,27,28,29,30,31,33,34,36,37,39,42,43,44,45,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,86,87,88,89,90,91,92,94,95,97,98,99,100,101,105,107,108,109,110,111,112,113,114,115,116,117,119,120,121,122,123,124,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,176,177,178,181,182,183,184,185,188,190,191,193,194,195,196,198,199,200,201,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,220,222,223,224,225,226,227,229,230,231,232,235,236,239,240,242,243,245,246,247,248,249,250,252,253,254,255,256,257,258,260,261,262,264,265,266,267,270,271,273,275,276,277,278,279,280,281,282,283,284,286,287,289,291,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,313,314,317,318,319,323,324,325,326,327,329,330,331,332,335,337,338,339,341,342,343,344,347,348,349,350,351,352,354,355,356,357,359,360,361,362,363,365,367,368,370,371,373,374,375,376,377,378,380,381,383,386,387,388,389,390,391,392,393,394,395,399,402,403,404,405,407,409,410,411,412,415,418,419,420,421,422,423,424,426,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,453,454,455,458,461,462,463,465,466,469,470,471,473,474,475,476,478,479,481,482,484,486,487,488,489,490,491,492,493,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,516,517,518,519,521,523,524,526,527,529,530,531,532,533,534,536,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,561,562,563,564,567,569,570,572,575,578,579,580,581,583,584,585,586,587,590,591,593,594,595,596,597,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,619,620,621,622,623,624,625,627,628,629,631,633,634,636,638,639,643,645,646,647,648,649,651,652,654,655,657,658,661,662,663,664,665,666,668,672,673,674,675,681,683,684,686,687,688,690,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,719,720,722,723,724,725,726,727,730,731,732,736,738,739,742,745,747,748,751,753,754,756,758,760,762,763,764,766,768,769,770,772,773,774,776,778,780,782,783,784,786,788,789,791,792,795,796,798,799,801,802,804,806,807,808,810,811,812,813,814,818,819,820,821,823,826,828,830,831,833,834,835,836,839,840,841,842,845,847,849,851,852,856,858,861,866,868,869,870,871,872,873,874,875,876,877,878,879,881,883,886,887,888,890,891,893,894,895,896,897,898,899,901,902,903,906,908,909,910,911,912,914,915,917,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,941,942,943,945,947,948,950,951,954,955,956,958,959,960,961,962,963,964,966,967,968,970,971,972,975,976,977,979,980,982,983,986,987,988,993,994,995,996,997,998,999,1001,1004,1006,1007,1009,1010,1013,1014,1015,1016,1017,1018,1019,1020,1022,1023 }; + const uint16_t g_unique_to_seed_6x6_p2[] = { 1,2,3,4,5,7,8,9,10,11,14,15,16,17,18,19,21,23,24,25,26,27,28,29,31,33,36,37,39,42,43,44,45,46,47,48,49,50,51,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,73,74,75,76,77,78,79,83,86,87,88,89,90,91,94,95,98,99,100,101,103,107,108,109,110,112,114,115,116,119,121,122,125,128,129,130,131,132,134,135,136,137,138,139,142,144,146,147,148,149,151,153,154,156,158,159,167,168,169,170,171,172,174,175,177,178,181,183,190,191,193,194,195,196,198,199,203,206,207,210,211,213,214,215,216,217,218,220,222,223,225,226,227,229,230,231,232,235,236,237,240,247,249,250,252,254,255,257,258,260,262,264,266,267,270,271,272,273,277,278,279,281,283,284,286,289,291,292,293,295,298,299,302,303,305,306,307,309,314,316,318,319,323,324,326,327,329,335,339,341,343,347,348,351,352,353,354,355,361,362,363,366,367,368,370,373,374,376,380,386,387,388,389,390,392,393,394,395,402,403,404,407,409,411,414,415,418,419,422,423,426,430,432,433,436,437,438,439,440,441,445,447,450,452,453,455,458,461,463,470,471,474,475,476,478,480,482,486,487,488,490,495,496,498,499,500,504,506,508,510,513,517,518,519,521,523,524,526,527,529,530,531,533,534,538,539,543,545,546,547,549,550,551,553,554,558,562,567,569,572,578,581,583,585,586,587,591,593,594,595,596,598,600,601,602,606,607,608,610,611,612,614,615,616,619,621,622,623,625,627,631,633,634,638,639,643,645,646,647,648,649,652,655,658,661,662,663,664,665,670,672,673,674,675,681,683,684,685,686,687,688,690,691,693,694,695,696,697,700,702,703,705,713,714,716,719,720,722,724,726,727,730,731,732,736,740,742,751,753,754,756,758,760,762,763,764,766,769,770,774,778,780,786,789,791,794,797,798,799,801,802,804,805,807,808,810,811,812,817,819,821,822,826,828,831,834,835,836,839,842,847,852,854,858,862,868,869,870,872,874,876,877,878,886,887,890,891,893,895,897,898,899,902,903,906,909,910,911,912,914,919,929,930,934,936,937,938,942,943,945,947,948,950,951,954,955,958,959,963,964,966,967,968,976,980,983,987,988,993,994,995,998,999,1001,1006,1007,1008,1009,1013,1014,1015,1016,1019,1022,1023 }; + const uint16_t g_unique_to_seed_8x5_p2[] = { 1,2,3,4,5,7,8,9,10,11,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,36,37,39,42,43,45,46,47,48,49,50,51,53,54,55,56,58,59,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,86,87,88,89,90,91,94,95,98,99,100,101,103,107,108,109,110,112,114,115,116,119,121,122,123,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,146,147,148,149,150,151,154,156,158,159,165,167,168,169,170,171,172,174,175,177,178,181,183,184,191,193,194,195,196,198,199,203,204,206,207,209,210,211,213,214,215,216,217,218,220,222,225,226,227,229,230,231,232,235,236,237,240,247,248,249,250,252,253,254,255,257,258,260,261,262,264,266,267,270,271,272,273,277,278,279,281,284,286,287,289,291,292,293,295,296,298,299,300,302,303,304,305,306,307,309,314,316,317,318,319,323,324,325,326,327,329,330,335,337,339,341,342,343,344,347,348,351,352,353,354,355,359,361,362,363,366,367,368,370,371,374,375,376,380,381,386,387,388,389,390,391,392,393,394,395,399,402,403,404,405,407,409,410,411,412,415,418,419,422,423,424,426,430,432,433,436,437,438,439,440,441,444,445,446,447,450,451,452,453,454,455,458,461,462,463,465,470,471,473,474,475,476,478,479,482,484,486,487,488,490,491,495,496,498,499,500,501,502,504,505,506,508,510,511,513,517,518,519,521,523,524,526,527,530,531,533,534,535,538,539,543,545,546,547,550,551,554,558,559,562,567,569,572,578,579,581,583,585,586,587,591,593,594,595,598,600,601,602,606,607,608,610,611,612,614,615,618,619,621,622,623,625,627,631,633,636,638,639,643,645,646,647,648,649,650,651,652,655,658,659,661,662,663,664,665,666,668,672,673,674,675,683,684,685,686,687,688,690,691,692,693,694,695,696,697,700,701,702,703,705,707,711,713,716,719,720,722,724,725,726,727,730,731,732,736,739,740,742,748,751,753,754,756,758,760,762,763,764,766,768,770,774,775,778,780,786,789,791,794,796,798,799,801,802,804,805,807,808,810,811,812,813,817,819,821,825,826,831,834,836,839,841,842,845,847,849,851,852,854,856,862,868,869,870,871,872,874,876,877,879,881,886,887,890,891,893,895,897,898,899,902,903,906,909,910,911,914,915,918,919,921,923,924,927,929,930,934,936,937,938,942,943,945,947,948,951,954,955,958,959,962,963,964,966,967,968,970,971,976,977,979,983,987,988,989,993,994,995,997,998,999,1001,1006,1007,1008,1009,1013,1015,1016,1018,1019,1022,1023 }; + const uint16_t g_unique_to_seed_8x6_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,36,37,39,42,43,44,45,46,47,48,49,50,51,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,86,87,88,89,90,91,94,95,98,99,100,101,103,107,108,109,110,112,113,114,115,116,119,121,122,123,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,146,147,148,149,150,151,153,154,156,158,159,165,167,168,169,170,171,172,174,175,177,178,181,183,184,190,191,193,194,195,196,198,199,200,203,204,206,207,208,209,210,211,213,214,215,216,217,218,220,222,223,225,226,227,229,230,231,232,235,236,237,240,245,246,247,248,249,250,252,253,254,255,257,258,260,261,262,264,266,267,270,271,272,273,277,278,279,281,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,314,316,317,318,319,323,324,325,326,327,329,330,335,337,339,341,342,343,344,347,348,349,350,351,352,353,354,355,359,361,362,363,365,366,367,368,370,371,373,374,375,376,380,381,386,387,388,389,390,391,392,393,394,395,399,402,403,404,405,407,409,410,411,412,414,415,418,419,422,423,424,426,430,432,433,436,437,438,439,440,441,443,444,445,446,447,450,451,452,453,454,455,458,461,462,463,465,470,471,473,474,475,476,478,479,480,482,484,486,487,488,489,490,491,495,496,498,499,500,501,502,504,505,506,508,510,511,513,517,518,519,521,523,524,526,527,529,530,531,533,534,535,538,539,543,545,546,547,549,550,551,553,554,558,559,562,567,569,572,575,578,579,581,583,585,586,587,591,593,594,595,596,598,600,601,602,606,607,608,610,611,612,614,615,616,618,619,621,622,623,625,627,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,655,658,659,661,662,663,664,665,666,668,670,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,700,701,702,703,705,707,711,713,714,716,719,720,722,724,725,726,727,730,731,732,736,739,740,742,748,751,753,754,756,758,760,762,763,764,766,768,769,770,773,774,775,776,778,780,786,789,791,792,794,796,797,798,799,801,802,804,805,807,808,810,811,812,813,817,818,819,821,822,825,826,828,830,831,833,834,835,836,839,841,842,845,847,849,851,852,854,856,858,861,862,868,869,870,871,872,873,874,876,877,878,879,881,886,887,888,890,891,893,895,897,898,899,902,903,906,909,910,911,912,914,915,918,919,921,923,924,927,929,930,933,934,935,936,937,938,940,942,943,945,947,948,950,951,954,955,958,959,962,963,964,966,967,968,970,971,976,977,979,980,983,986,987,988,989,993,994,995,997,998,999,1001,1006,1007,1008,1009,1013,1014,1015,1016,1018,1019,1021,1022,1023 }; + const uint16_t g_unique_to_seed_10x5_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,84,85,86,87,88,89,90,91,94,95,97,98,99,100,101,103,107,108,109,110,112,113,114,115,116,119,121,122,123,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,153,154,155,156,157,158,159,165,167,168,169,170,171,172,174,175,177,178,181,183,184,185,188,191,193,194,195,196,198,199,200,203,204,206,207,208,209,210,211,213,214,215,216,217,218,219,220,222,223,225,226,227,229,230,231,232,235,236,237,238,239,240,247,248,249,250,252,253,254,255,257,258,260,261,262,263,264,265,266,267,270,271,272,273,275,277,278,279,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,314,316,317,318,319,323,324,325,326,327,329,330,332,335,337,339,341,342,343,344,347,348,349,350,351,352,353,354,355,358,359,361,362,363,365,366,367,368,370,371,374,375,376,380,381,386,387,388,389,390,391,392,393,394,395,399,402,403,404,405,407,409,410,411,412,415,418,419,422,423,424,426,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,458,461,462,463,465,470,471,473,474,475,476,478,479,481,482,484,486,487,488,489,490,491,492,495,496,498,499,500,501,502,503,504,505,506,508,510,511,513,517,518,519,521,523,524,526,527,529,530,531,533,534,535,538,539,542,543,545,546,547,548,550,551,554,558,559,562,567,569,570,571,572,575,578,579,580,581,583,585,586,587,591,593,594,595,597,598,600,601,602,606,607,608,609,610,611,612,614,615,616,617,618,619,621,622,623,624,625,627,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,655,657,658,659,661,662,663,664,665,666,668,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,711,713,716,719,720,722,723,724,725,726,727,730,731,732,736,739,740,742,748,751,753,754,755,756,757,758,760,762,763,764,766,768,770,773,774,775,776,778,780,786,789,791,794,795,796,798,799,801,802,804,805,807,808,810,811,812,813,814,817,818,819,821,822,823,825,826,828,830,831,834,835,836,839,841,842,845,847,849,851,852,854,856,862,868,869,870,871,872,873,874,876,877,878,879,881,883,886,887,888,890,891,893,895,897,898,899,901,902,903,906,909,910,911,914,915,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,941,942,943,945,947,948,950,951,954,955,956,958,959,961,962,963,964,966,967,968,969,970,971,975,976,977,979,980,983,986,987,988,989,993,994,995,996,997,998,999,1001,1006,1007,1008,1009,1013,1014,1015,1016,1017,1018,1019,1020,1022,1023 }; + const uint16_t g_unique_to_seed_10x6_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,84,85,86,87,88,89,90,91,94,95,97,98,99,100,101,103,107,108,109,110,112,113,114,115,116,119,121,122,123,124,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,153,154,155,156,157,158,159,165,167,168,169,170,171,172,174,175,177,178,181,183,184,185,188,190,191,193,194,195,196,198,199,200,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,225,226,227,229,230,231,232,235,236,237,238,239,240,245,246,247,248,249,250,252,253,254,255,257,258,260,261,262,263,264,265,266,267,270,271,272,273,274,275,277,278,279,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,314,316,317,318,319,323,324,325,326,327,329,330,332,335,337,338,339,341,342,343,344,347,348,349,350,351,352,353,354,355,358,359,361,362,363,365,366,367,368,370,371,373,374,375,376,380,381,386,387,388,389,390,391,392,393,394,395,399,402,403,404,405,407,409,410,411,412,414,415,418,419,422,423,424,426,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,458,461,462,463,465,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,492,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,516,517,518,519,521,523,524,526,527,529,530,531,533,534,535,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,562,567,569,570,571,572,575,578,579,580,581,583,585,586,587,590,591,593,594,595,596,597,598,600,601,602,606,607,608,609,610,611,612,614,615,616,617,618,619,621,622,623,624,625,627,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,655,657,658,659,661,662,663,664,665,666,668,670,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,719,720,722,723,724,725,726,727,730,731,732,736,739,740,742,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,772,773,774,775,776,778,780,782,783,786,788,789,791,792,794,795,796,797,798,799,801,802,804,805,807,808,810,811,812,813,814,817,818,819,821,822,823,825,826,828,830,831,833,834,835,836,839,841,842,845,847,849,851,852,854,856,858,861,862,868,869,870,871,872,873,874,876,877,878,879,881,883,886,887,888,890,891,893,895,897,898,899,901,902,903,906,908,909,910,911,912,914,915,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,940,941,942,943,945,947,948,949,950,951,954,955,956,958,959,961,962,963,964,966,967,968,969,970,971,975,976,977,979,980,983,986,987,988,989,993,994,995,996,997,998,999,1001,1004,1006,1007,1008,1009,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_8x8_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,36,37,39,42,43,44,45,46,47,48,49,50,51,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,86,87,88,89,90,91,94,95,97,98,99,100,101,102,103,107,108,109,110,111,112,113,114,115,116,118,119,120,121,122,123,124,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,146,147,148,149,150,151,152,153,154,155,156,158,159,161,165,167,168,169,170,171,172,174,175,177,178,181,182,183,184,188,190,191,193,194,195,196,198,199,200,201,203,204,206,207,208,209,210,211,213,214,215,216,217,218,220,222,223,224,225,226,227,229,230,231,232,235,236,237,239,240,242,245,246,247,248,249,250,252,253,254,255,257,258,260,261,262,264,265,266,267,270,271,272,273,276,277,278,279,280,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,314,316,317,318,319,323,324,325,326,327,329,330,331,332,333,335,337,338,339,341,342,343,344,347,348,349,350,351,352,353,354,355,359,361,362,363,365,366,367,368,370,371,373,374,375,376,380,381,386,387,388,389,390,391,392,393,394,395,399,400,402,403,404,405,407,409,410,411,412,414,415,418,419,420,422,423,424,426,430,432,433,436,437,438,439,440,441,443,444,445,446,447,450,451,452,453,454,455,458,461,462,463,465,466,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,495,496,498,499,500,501,502,503,504,505,506,508,510,511,513,516,517,518,519,521,523,524,526,527,529,530,531,533,534,535,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,562,563,567,569,572,575,578,579,580,581,583,585,586,587,590,591,593,594,595,596,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,618,619,620,621,622,623,625,627,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,655,658,659,661,662,663,664,665,666,668,670,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,700,701,702,703,705,707,709,711,713,714,716,719,720,722,724,725,726,727,730,731,732,736,739,740,742,743,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,772,773,774,775,776,778,780,782,783,786,789,791,792,793,794,796,797,798,799,801,802,804,805,806,807,808,810,811,812,813,814,817,818,819,821,822,825,826,828,830,831,833,834,835,836,839,840,841,842,845,847,849,851,852,854,856,858,861,862,866,868,869,870,871,872,873,874,876,877,878,879,881,886,887,888,890,891,893,894,895,897,898,899,901,902,903,906,908,909,910,911,912,914,915,916,917,918,919,921,923,924,927,929,930,933,934,935,936,937,938,940,942,943,945,947,948,949,950,951,954,955,956,958,959,962,963,964,966,967,968,969,970,971,975,976,977,979,980,983,986,987,988,989,993,994,995,997,998,999,1001,1006,1007,1008,1009,1010,1013,1014,1015,1016,1018,1019,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_10x8_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,34,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,83,84,85,86,87,88,89,90,91,92,94,95,97,98,99,100,101,102,103,105,107,108,109,110,111,112,113,114,115,116,118,119,120,121,122,123,124,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,177,178,179,180,181,182,183,184,185,188,190,191,193,194,195,196,198,199,200,201,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,229,230,231,232,235,236,237,238,239,240,242,245,246,247,248,249,250,252,253,254,255,257,258,260,261,262,263,264,265,266,267,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,314,316,317,318,319,321,323,324,325,326,327,329,330,331,332,333,335,337,338,339,341,342,343,344,347,348,349,350,351,352,353,354,355,358,359,361,362,363,365,366,367,368,370,371,373,374,375,376,378,380,381,385,386,387,388,389,390,391,392,393,394,395,399,400,402,403,404,405,407,409,410,411,412,414,415,418,419,420,422,423,424,426,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,458,461,462,463,465,466,469,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,492,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,516,517,518,519,520,521,523,524,526,527,529,530,531,533,534,535,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,562,563,567,569,570,571,572,575,578,579,580,581,583,584,585,586,587,590,591,593,594,595,596,597,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,618,619,620,621,622,623,624,625,627,628,629,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,655,657,658,659,661,662,663,664,665,666,668,670,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,719,720,722,723,724,725,726,727,730,731,732,736,739,740,742,743,745,747,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,772,773,774,775,776,778,780,781,782,783,786,788,789,791,792,793,794,795,796,797,798,799,801,802,804,805,806,807,808,810,811,812,813,814,817,818,819,821,822,823,825,826,828,830,831,833,834,835,836,839,840,841,842,845,847,849,851,852,854,856,858,861,862,866,868,869,870,871,872,873,874,876,877,878,879,880,881,883,886,887,888,890,891,893,894,895,897,898,899,901,902,903,906,908,909,910,911,912,914,915,916,917,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,940,941,942,943,945,947,948,949,950,951,954,955,956,958,959,960,961,962,963,964,966,967,968,969,970,971,975,976,977,979,980,982,983,986,987,988,989,993,994,995,996,997,998,999,1001,1004,1006,1007,1008,1009,1010,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_10x10_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,34,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,81,82,83,84,85,86,87,88,89,90,91,92,94,95,97,98,99,100,101,102,103,105,107,108,109,110,111,112,113,114,115,116,118,119,120,121,122,123,124,125,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,176,177,178,179,180,181,182,183,184,185,188,190,191,193,194,195,196,198,199,200,201,202,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,229,230,231,232,235,236,237,238,239,240,242,245,246,247,248,249,250,252,253,254,255,256,257,258,260,261,262,263,264,265,266,267,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,313,314,316,317,318,319,321,323,324,325,326,327,329,330,331,332,333,335,337,338,339,341,342,343,344,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,365,366,367,368,370,371,373,374,375,376,377,378,380,381,385,386,387,388,389,390,391,392,393,394,395,399,400,402,403,404,405,407,409,410,411,412,414,415,417,418,419,420,421,422,423,424,425,426,427,428,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,458,459,461,462,463,465,466,469,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,492,493,494,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,514,516,517,518,519,520,521,522,523,524,526,527,528,529,530,531,532,533,534,535,536,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,561,562,563,566,567,569,570,571,572,575,578,579,580,581,583,584,585,586,587,590,591,593,594,595,596,597,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,618,619,620,621,622,623,624,625,627,628,629,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,654,655,657,658,659,661,662,663,664,665,666,667,668,670,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,719,720,722,723,724,725,726,727,730,731,732,736,738,739,740,742,743,745,747,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,771,772,773,774,775,776,778,780,781,782,783,784,786,788,789,791,792,793,794,795,796,797,798,799,801,802,803,804,805,806,807,808,810,811,812,813,814,817,818,819,820,821,822,823,825,826,828,829,830,831,833,834,835,836,839,840,841,842,845,846,847,848,849,851,852,854,855,856,858,861,862,866,867,868,869,870,871,872,873,874,876,877,878,879,880,881,883,886,887,888,890,891,893,894,895,896,897,898,899,901,902,903,904,906,908,909,910,911,912,914,915,916,917,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,940,941,942,943,945,947,948,949,950,951,954,955,956,958,959,960,961,962,963,964,966,967,968,969,970,971,972,975,976,977,979,980,982,983,986,987,988,989,993,994,995,996,997,998,999,1001,1004,1006,1007,1008,1009,1010,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_12x10_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,34,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,94,95,97,98,99,100,101,102,103,104,105,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,176,177,178,179,180,181,182,183,184,185,188,190,191,193,194,195,196,198,199,200,201,202,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,229,230,231,232,235,236,237,238,239,240,242,243,244,245,246,247,248,249,250,252,253,254,255,256,257,258,260,261,262,263,264,265,266,267,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,286,287,289,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,313,314,316,317,318,319,321,323,324,325,326,327,329,330,331,332,333,335,337,338,339,341,342,343,344,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,365,366,367,368,370,371,373,374,375,376,377,378,380,381,383,385,386,387,388,389,390,391,392,393,394,395,399,400,402,403,404,405,407,409,410,411,412,414,415,417,418,419,420,421,422,423,424,425,426,427,428,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,458,459,461,462,463,465,466,469,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,492,493,494,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,514,515,516,517,518,519,520,521,522,523,524,526,527,528,529,530,531,532,533,534,535,536,537,538,539,542,543,545,546,547,548,549,550,551,553,554,558,559,561,562,563,564,566,567,569,570,571,572,575,578,579,580,581,583,584,585,586,587,589,590,591,593,594,595,596,597,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,618,619,620,621,622,623,624,625,627,628,629,631,633,634,636,638,639,643,645,646,647,648,649,650,651,652,654,655,657,658,659,661,662,663,664,665,666,667,668,670,671,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,719,720,722,723,724,725,726,727,730,731,732,736,738,739,740,742,743,745,747,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,771,772,773,774,775,776,778,780,781,782,783,784,785,786,787,788,789,791,792,793,794,795,796,797,798,799,801,802,803,804,805,806,807,808,810,811,812,813,814,817,818,819,820,821,822,823,825,826,828,829,830,831,833,834,835,836,839,840,841,842,845,846,847,848,849,851,852,854,855,856,857,858,861,862,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,883,886,887,888,890,891,893,894,895,896,897,898,899,901,902,903,904,906,908,909,910,911,912,913,914,915,916,917,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,940,941,942,943,944,945,947,948,949,950,951,954,955,956,958,959,960,961,962,963,964,966,967,968,969,970,971,972,975,976,977,979,980,982,983,986,987,988,989,993,994,995,996,997,998,999,1001,1004,1006,1007,1008,1009,1010,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_12x12_p2[] = { 1,2,3,4,5,7,8,9,10,11,13,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,31,32,33,34,36,37,39,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,94,95,97,98,99,100,101,102,103,104,105,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,128,129,130,131,132,134,135,136,137,138,139,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,161,165,167,168,169,170,171,172,174,175,176,177,178,179,180,181,182,183,184,185,186,188,190,191,193,194,195,196,198,199,200,201,202,203,204,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,229,230,231,232,235,236,237,238,239,240,242,243,244,245,246,247,248,249,250,252,253,254,255,256,257,258,260,261,262,263,264,265,266,267,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,286,287,289,290,291,292,293,294,295,296,298,299,300,302,303,304,305,306,307,309,310,313,314,316,317,318,319,321,323,324,325,326,327,329,330,331,332,333,335,337,338,339,340,341,342,343,344,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,365,366,367,368,370,371,373,374,375,376,377,378,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,399,400,402,403,404,405,407,409,410,411,412,414,415,417,418,419,420,421,422,423,424,425,426,427,428,429,430,432,433,436,437,438,439,440,441,443,444,445,446,447,449,450,451,452,453,454,455,456,458,459,461,462,463,465,466,469,470,471,473,474,475,476,478,479,480,481,482,484,486,487,488,489,490,491,492,493,494,495,496,498,499,500,501,502,503,504,505,506,508,509,510,511,513,514,515,516,517,518,519,520,521,522,523,524,526,527,528,529,530,531,532,533,534,535,536,537,538,539,542,543,545,546,547,548,549,550,551,553,554,557,558,559,561,562,563,564,566,567,569,570,571,572,575,576,578,579,580,581,583,584,585,586,587,589,590,591,593,594,595,596,597,598,599,600,601,602,605,606,607,608,609,610,611,612,614,615,616,617,618,619,620,621,622,623,624,625,627,628,629,631,633,634,636,638,639,640,643,644,645,646,647,648,649,650,651,652,654,655,657,658,659,660,661,662,663,664,665,666,667,668,670,671,672,673,674,675,681,683,684,685,686,687,688,690,691,692,693,694,695,696,697,699,700,701,702,703,705,706,707,709,711,713,714,716,717,719,720,721,722,723,724,725,726,727,730,731,732,736,738,739,740,742,743,745,747,748,751,753,754,755,756,757,758,760,762,763,764,766,768,769,770,771,772,773,774,775,776,778,780,781,782,783,784,785,786,787,788,789,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,810,811,812,813,814,815,817,818,819,820,821,822,823,825,826,828,829,830,831,833,834,835,836,837,839,840,841,842,844,845,846,847,848,849,851,852,854,855,856,857,858,861,862,863,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,883,886,887,888,890,891,893,894,895,896,897,898,899,901,902,903,904,906,908,909,910,911,912,913,914,915,916,917,918,919,920,921,923,924,927,929,930,932,933,934,935,936,937,938,940,941,942,943,944,945,947,948,949,950,951,954,955,956,958,959,960,961,962,963,964,966,967,968,969,970,971,972,975,976,977,979,980,982,983,986,987,988,989,993,994,995,996,997,998,999,1001,1004,1006,1007,1008,1009,1010,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023 }; + + const uint16_t g_unique_to_seed_4x4_p3[] = { 0,3,8,11,14,15,17,26,29,30,31,32,33,36,38,43,44,47,49,51,55,56,57,59,67,70,74,76,79,81,82,88,89,90,100,104,108,110,111,117,122,126,127,132,133,134,135,139,147,150,151,152,156,157,163,166,167,168,171,175,176,179,181,182,183,186,189,192,199,203,205,207,210,214,216,230,236,247,249,250,252,254,260,262,263,266,272,273,276,291,292,294,297,302,309,310,313,314,318,319,324,327,328,330,331,335,337,346,355,356,357,358,363,365,368,378,381,384,386,388,390,391,392,397,398,401,410,411,417,419,427,431,437,439,440,446,451,455,457,458,459,460,462,464,467,468,471,472,474,475,477,479,483,487,488,493,495,496,497,502,503,504,511,512,516,518,519,523,525,530,532,538,543,544,546,547,549,550,551,553,554,562,567,568,570,571,578,579,581,582,588,589,590,593,594,600,601,606,611,613,623,624,625,630,637,638,645,646,648,650,651,658,659,662,666,669,670,678,683,686,688,691,694,696,699,700,701,703,704,707,713,715,717,719,722,724,725,727,730,731,735,738,739,745,750,751,758,759,760,766,775,776,779,783,784,785,786,787,788,798,799,802,804,805,807,808,809,812,821,822,823,825,827,831,835,837,838,842,844,845,846,848,853,854,858,859,860,866,884,888,892,894,898,902,906,907,915,918,922,923,925,927,931,932,937,938,940,943,945,953,955,958,959,963,971,974,977,979,989,990,998,1005,1006,1007,1011,1012,1015,1020,1023 }; + const uint16_t g_unique_to_seed_5x4_p3[] = { 0,3,7,8,11,12,14,15,17,18,26,29,30,31,32,33,34,36,38,39,43,44,47,49,51,55,56,57,59,62,63,67,70,74,76,79,81,82,88,89,90,91,100,103,104,108,110,111,117,122,123,126,127,132,133,134,135,136,139,144,147,150,151,152,156,157,158,163,166,167,168,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,210,214,216,222,230,236,246,247,249,250,252,254,259,260,262,263,266,269,272,273,274,275,276,291,292,293,294,297,302,306,309,310,311,313,314,315,318,319,324,327,328,330,331,335,337,346,355,356,357,358,359,363,365,368,377,378,381,384,386,388,390,391,392,394,397,398,401,407,410,411,417,419,427,430,431,437,439,440,446,451,455,457,458,459,460,462,464,467,468,470,471,472,474,475,477,478,479,483,485,487,488,493,495,496,497,501,502,503,504,506,508,510,511,512,515,516,518,519,521,523,524,525,530,532,538,541,543,544,546,547,549,550,551,552,553,554,562,567,568,570,571,577,578,579,581,582,588,589,590,593,594,595,600,601,603,606,609,611,613,623,624,625,630,632,637,638,639,645,646,648,650,651,654,658,659,662,666,669,670,678,679,683,685,686,688,691,694,696,699,700,701,703,704,707,713,715,717,719,722,724,725,727,730,731,732,735,738,739,742,745,746,749,750,751,758,759,760,766,769,773,775,776,779,783,784,785,786,787,788,791,793,798,799,802,804,805,806,807,808,809,812,813,821,822,823,825,827,831,835,837,838,839,842,844,845,846,848,853,854,858,859,860,866,873,876,877,884,887,888,892,894,898,902,906,907,914,915,918,919,922,923,925,927,931,932,937,938,940,943,944,945,951,953,955,958,959,963,971,972,974,977,979,982,983,989,990,991,998,999,1005,1006,1007,1010,1011,1012,1015,1020,1022,1023 }; + const uint16_t g_unique_to_seed_5x5_p3[] = { 0,3,7,8,10,11,12,14,15,17,18,26,27,29,30,31,32,33,34,36,38,39,43,44,47,48,49,50,51,55,56,57,59,60,61,62,63,67,70,72,74,76,79,81,82,88,89,90,91,94,100,103,104,106,108,110,111,115,117,122,123,126,127,128,130,132,133,134,135,136,139,144,147,150,151,152,156,157,158,162,163,166,167,168,169,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,209,210,214,216,220,222,227,230,235,236,246,247,249,250,252,254,257,259,260,262,263,266,269,272,273,274,275,276,279,282,291,292,293,294,295,297,302,306,309,310,311,313,314,315,318,319,324,326,327,328,330,331,335,337,342,345,346,353,355,356,357,358,359,363,364,365,368,371,374,377,378,381,384,386,387,388,390,391,392,394,397,398,399,401,407,410,411,417,419,427,430,431,437,438,439,440,443,446,451,455,456,457,458,459,460,462,463,464,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,501,502,503,504,506,508,510,511,512,515,516,518,519,521,522,523,524,525,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,555,562,567,568,570,571,577,578,579,581,582,586,588,589,590,593,594,595,600,601,602,603,606,609,610,611,613,618,623,624,625,626,630,632,637,638,639,645,646,648,650,651,654,658,659,662,666,667,668,669,670,671,678,679,683,685,686,687,688,691,694,696,698,699,700,701,703,704,707,708,713,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,745,746,747,748,749,750,751,753,758,759,760,764,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,791,793,794,798,799,800,802,804,805,806,807,808,809,811,812,813,821,822,823,825,827,831,835,837,838,839,840,842,843,844,845,846,847,848,850,852,853,854,858,859,860,866,869,873,874,876,877,881,884,886,887,888,892,894,897,898,902,905,906,907,914,915,918,919,920,922,923,925,927,931,932,937,938,940,943,944,945,951,953,954,955,958,959,963,971,972,973,974,977,978,979,982,983,989,990,991,992,998,999,1004,1005,1006,1007,1010,1011,1012,1015,1020,1022,1023 }; + const uint16_t g_unique_to_seed_6x5_p3[] = { 0,3,7,8,10,11,12,14,15,17,18,21,23,26,27,29,30,31,32,33,34,35,36,38,39,42,43,44,47,48,49,50,51,55,56,57,59,60,61,62,63,67,70,72,74,76,79,81,82,88,89,90,91,94,100,102,103,104,106,108,110,111,114,115,117,120,122,123,126,127,128,130,132,133,134,135,136,139,140,144,147,150,151,152,153,156,157,158,162,163,166,167,168,169,171,173,175,176,179,181,182,183,186,189,192,198,199,200,203,205,207,209,210,214,216,220,222,227,230,231,235,236,245,246,247,249,250,252,254,257,259,260,262,263,266,269,272,273,274,275,276,279,281,282,288,291,292,293,294,295,297,300,302,306,309,310,311,313,314,315,318,319,324,326,327,328,330,331,335,337,342,345,346,348,353,355,356,357,358,359,363,364,365,368,371,372,374,377,378,379,381,384,386,387,388,390,391,392,394,395,397,398,399,401,407,410,411,412,413,417,419,427,430,431,437,438,439,440,443,446,450,451,455,456,457,458,459,460,461,462,463,464,465,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,500,501,502,503,504,505,506,508,510,511,512,513,515,516,518,519,521,522,523,524,525,527,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,555,557,558,562,566,567,568,570,571,577,578,579,580,581,582,584,586,588,589,590,593,594,595,600,601,602,603,606,609,610,611,613,614,618,623,624,625,626,630,632,637,638,639,644,645,646,648,650,651,654,658,659,662,666,667,668,669,670,671,678,679,683,685,686,687,688,689,691,694,696,698,699,700,701,703,704,707,708,711,713,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,746,747,748,749,750,751,753,758,759,760,764,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,791,793,794,798,799,800,802,804,805,806,807,808,809,810,811,812,813,821,822,823,824,825,827,831,835,836,837,838,839,840,841,842,843,844,845,846,847,848,850,852,853,854,858,859,860,866,869,873,874,876,877,881,884,886,887,888,892,894,897,898,900,902,905,906,907,914,915,918,919,920,922,923,925,927,931,932,937,938,940,943,944,945,951,953,954,955,957,958,959,963,967,971,972,973,974,977,978,979,982,983,986,989,990,991,992,998,999,1003,1004,1005,1006,1007,1010,1011,1012,1015,1020,1022,1023 }; + + const uint16_t g_unique_to_seed_6x6_p3[] = { 0,8,11,14,15,17,18,19,26,31,34,35,36,38,44,47,48,49,51,56,59,61,70,74,76,82,88,90,96,100,103,104,108,110,111,117,122,123,126,127,132,133,135,139,147,150,151,152,156,157,163,166,168,171,175,176,179,181,182,183,186,189,192,199,203,205,207,210,214,216,222,247,249,250,252,254,260,261,262,263,266,272,273,275,276,288,291,292,293,294,297,302,309,310,313,314,318,327,328,331,335,337,346,356,357,358,363,365,368,378,381,384,386,390,391,392,396,397,398,399,401,410,411,419,427,430,431,437,439,440,451,455,457,458,459,460,462,468,470,471,472,474,475,477,479,482,483,488,493,495,496,502,503,504,507,510,511,512,515,516,518,519,522,523,525,526,527,538,543,544,546,547,549,550,552,553,554,562,570,578,579,581,582,588,589,590,593,595,600,606,611,613,618,623,625,632,637,638,645,646,650,651,658,659,662,666,667,669,670,678,679,685,686,687,688,691,694,696,698,699,700,701,703,704,707,713,714,715,717,719,722,724,727,730,731,734,738,739,743,747,748,750,751,753,758,760,764,766,769,775,776,783,784,785,787,791,793,798,799,802,804,805,806,807,808,809,810,813,822,823,825,831,835,837,838,839,840,842,845,846,848,853,854,858,859,860,866,874,882,884,887,888,892,894,898,902,907,914,915,918,919,922,923,925,927,931,932,937,938,940,943,944,945,953,955,958,959,963,966,971,974,979,990,991,998,999,1007,1010,1011,1012,1015,1020,1023 }; + const uint16_t g_unique_to_seed_8x5_p3[] = { 0,3,8,11,14,15,17,18,19,23,26,27,29,31,33,34,35,36,38,43,44,47,48,49,51,55,56,59,61,67,70,76,79,81,82,88,89,90,96,100,103,104,108,110,111,117,122,123,126,127,132,133,134,135,139,147,150,151,152,156,157,163,166,167,168,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,210,214,216,227,230,247,249,250,254,260,261,262,263,266,272,273,275,276,279,288,291,292,293,294,297,302,307,309,310,313,314,315,318,319,327,328,331,335,337,346,355,356,357,358,359,363,365,377,378,381,384,386,390,391,392,394,396,397,398,399,401,407,410,411,419,424,427,430,431,437,439,440,450,451,455,457,458,459,460,462,464,467,468,470,471,472,474,475,477,478,479,482,483,487,488,493,495,496,502,503,504,507,508,511,512,515,516,518,519,522,523,524,526,527,538,543,544,547,549,550,552,553,554,557,562,568,570,578,579,581,582,588,589,590,593,595,600,602,603,606,609,611,613,614,624,625,632,637,638,639,645,646,650,651,658,659,662,666,667,669,670,678,679,685,686,687,688,689,691,694,696,699,700,701,703,704,707,712,713,715,717,719,722,724,727,730,731,734,738,739,743,745,747,750,751,758,759,760,763,764,766,769,771,775,776,779,781,783,784,785,787,791,793,798,799,802,804,805,806,807,809,810,812,813,822,823,825,831,835,837,838,840,842,844,845,846,848,853,854,858,859,860,866,873,876,882,884,887,888,892,894,895,898,902,906,907,914,915,918,919,922,923,925,927,931,932,937,938,940,943,944,945,947,951,953,955,958,959,963,966,971,974,977,979,983,989,990,991,998,999,1005,1007,1010,1011,1012,1015,1023 }; + const uint16_t g_unique_to_seed_8x6_p3[] = { 0,3,8,11,14,15,17,18,19,23,26,27,29,31,33,34,35,36,38,43,44,47,48,49,51,55,56,59,61,67,70,74,76,79,81,82,88,89,90,96,100,103,104,108,110,111,117,122,123,126,127,131,132,133,134,135,139,147,150,151,152,156,157,163,166,167,168,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,210,214,216,222,227,230,236,247,249,250,252,254,260,261,262,263,266,272,273,275,276,279,288,291,292,293,294,297,302,307,309,310,313,314,315,318,319,324,327,328,331,335,337,338,346,355,356,357,358,359,363,365,368,377,378,381,384,386,390,391,392,394,396,397,398,399,401,407,410,411,419,424,427,430,431,437,439,440,450,451,455,457,458,459,460,462,464,467,468,470,471,472,474,475,477,478,479,482,483,485,487,488,493,495,496,502,503,504,507,508,510,511,512,515,516,518,519,522,523,524,525,526,527,538,541,543,544,546,547,549,550,552,553,554,557,562,566,567,568,570,578,579,581,582,588,589,590,593,595,600,601,602,603,606,609,611,613,614,618,623,624,625,632,637,638,639,645,646,650,651,658,659,662,666,667,669,670,678,679,685,686,687,688,689,691,694,696,698,699,700,701,703,704,707,708,712,713,714,715,717,719,722,724,725,727,730,731,732,734,738,739,743,745,747,748,750,751,753,758,759,760,763,764,766,769,771,775,776,779,781,783,784,785,786,787,791,793,798,799,802,804,805,806,807,808,809,810,812,813,822,823,825,831,835,837,838,839,840,842,844,845,846,848,850,853,854,858,859,860,866,873,874,876,882,884,887,888,892,894,895,898,900,902,906,907,914,915,918,919,922,923,925,927,931,932,937,938,940,943,944,945,947,951,953,955,958,959,963,966,971,974,977,979,983,989,990,991,998,999,1005,1007,1010,1011,1012,1015,1020,1022,1023 }; + const uint16_t g_unique_to_seed_10x5_p3[] = { 0,3,7,8,11,14,15,17,18,19,23,26,27,29,31,33,34,35,36,38,43,44,47,48,49,51,55,56,59,61,62,67,70,72,76,79,81,82,88,89,90,91,95,96,100,103,104,108,110,111,114,117,122,123,126,127,131,132,133,134,135,139,140,147,150,151,152,156,157,158,163,166,167,168,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,210,213,214,216,227,230,245,247,249,250,254,259,260,261,262,263,266,269,272,273,274,275,276,279,281,288,291,292,293,294,295,297,302,307,309,310,313,314,315,318,319,327,328,331,335,337,346,355,356,357,358,359,363,365,377,378,381,384,386,390,391,392,394,396,397,398,399,401,407,410,411,412,413,419,424,427,430,431,437,439,440,450,451,455,457,458,459,460,462,464,467,468,470,471,472,474,475,477,478,479,482,483,487,488,493,495,496,500,501,502,503,504,506,507,508,510,511,512,515,516,518,519,521,522,523,524,526,527,530,538,541,543,544,547,549,550,552,553,554,555,557,562,565,568,570,577,578,579,581,582,588,589,590,593,595,600,601,602,603,606,609,611,613,614,618,624,625,632,637,638,639,645,646,650,651,654,658,659,662,666,667,669,670,678,679,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,712,713,715,717,719,722,724,725,727,730,731,732,734,738,739,742,743,745,747,749,750,751,758,759,760,763,764,765,766,769,771,773,775,776,779,781,783,784,785,786,787,791,793,798,799,802,804,805,806,807,809,810,812,813,821,822,823,825,827,831,835,836,837,838,839,840,841,842,844,845,846,848,853,854,858,859,860,866,869,873,876,877,882,884,887,888,891,892,894,895,898,900,902,905,906,907,909,914,915,918,919,922,923,925,927,931,932,937,938,939,940,943,944,945,947,951,953,954,955,957,958,959,961,963,966,967,971,974,975,977,978,979,983,989,990,991,993,998,999,1005,1007,1010,1011,1012,1015,1023 }; + + const uint16_t g_unique_to_seed_10x6_p3[] = { 0,3,7,8,11,12,14,15,17,18,19,23,26,27,29,31,33,34,35,36,38,43,44,47,48,49,51,55,56,59,61,62,67,70,72,74,76,79,81,82,88,89,90,91,95,96,100,103,104,108,110,111,114,117,122,123,126,127,131,132,133,134,135,139,140,147,150,151,152,156,157,158,163,166,167,168,171,173,175,176,179,181,182,183,186,189,192,199,203,205,207,210,213,214,216,222,227,230,236,245,246,247,249,250,252,254,259,260,261,262,263,266,269,272,273,274,275,276,279,281,288,291,292,293,294,295,297,302,306,307,309,310,311,313,314,315,318,319,324,327,328,330,331,335,337,338,346,355,356,357,358,359,363,364,365,368,377,378,381,384,386,390,391,392,394,396,397,398,399,401,407,410,411,412,413,419,424,427,430,431,437,439,440,450,451,455,457,458,459,460,462,464,467,468,470,471,472,474,475,477,478,479,482,483,485,487,488,493,495,496,500,501,502,503,504,506,507,508,510,511,512,515,516,518,519,521,522,523,524,525,526,527,530,538,539,541,543,544,546,547,549,550,552,553,554,555,557,562,565,566,567,568,570,577,578,579,581,582,588,589,590,593,595,600,601,602,603,606,609,611,613,614,618,623,624,625,632,637,638,639,645,646,648,650,651,654,658,659,662,666,667,669,670,678,679,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,708,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,747,748,749,750,751,753,758,759,760,763,764,765,766,769,771,773,775,776,779,781,783,784,785,786,787,791,793,798,799,802,804,805,806,807,808,809,810,812,813,821,822,823,825,827,831,835,836,837,838,839,840,841,842,844,845,846,848,850,853,854,858,859,860,866,869,873,874,876,877,882,884,887,888,891,892,894,895,898,900,902,905,906,907,909,914,915,918,919,922,923,925,927,931,932,937,938,939,940,943,944,945,947,951,953,954,955,957,958,959,961,963,966,967,971,974,975,977,978,979,982,983,989,990,991,993,998,999,1005,1007,1010,1011,1012,1015,1020,1022,1023 }; + const uint16_t g_unique_to_seed_8x8_p3[] = { 0,3,7,8,11,12,14,15,17,18,19,23,26,27,29,30,31,32,33,34,35,36,38,39,43,44,47,48,49,50,51,55,56,57,59,60,61,63,67,70,72,74,76,79,81,82,88,89,90,96,100,103,104,106,108,110,111,117,122,123,126,127,131,132,133,134,135,136,139,144,147,150,151,152,156,157,158,163,166,167,168,171,173,175,176,178,179,181,182,183,186,189,192,199,203,205,207,210,214,216,222,227,230,235,236,246,247,249,250,252,254,260,261,262,263,266,269,272,273,275,276,279,288,291,292,293,294,295,297,302,306,307,309,310,311,313,314,315,318,319,324,327,328,330,331,335,337,338,342,345,346,355,356,357,358,359,363,365,368,371,377,378,381,384,386,388,390,391,392,394,396,397,398,399,401,407,410,411,417,419,424,427,430,431,437,439,440,446,450,451,455,457,458,459,460,462,464,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,502,503,504,507,508,510,511,512,515,516,518,519,521,522,523,524,525,526,527,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,557,562,566,567,568,570,571,577,578,579,581,582,586,588,589,590,592,593,594,595,600,601,602,603,606,609,610,611,613,614,618,623,624,625,630,632,637,638,639,645,646,648,650,651,658,659,662,666,667,669,670,671,678,679,683,685,686,687,688,689,691,694,696,698,699,700,701,703,704,707,708,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,743,745,746,747,748,750,751,753,758,759,760,763,764,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,791,793,794,798,799,802,804,805,806,807,808,809,810,811,812,813,821,822,823,825,827,831,835,837,838,839,840,842,844,845,846,847,848,850,852,853,854,858,859,860,866,873,874,876,877,882,884,886,887,888,892,894,895,897,898,900,902,906,907,914,915,918,919,920,922,923,925,927,931,932,937,938,940,943,944,945,947,951,953,954,955,958,959,963,966,971,972,974,977,979,982,983,989,990,991,998,999,1005,1006,1007,1010,1011,1012,1015,1020,1022,1023 }; + const uint16_t g_unique_to_seed_10x8_p3[] = { 0,3,7,8,11,12,14,15,17,18,19,23,26,27,29,30,31,32,33,34,35,36,38,39,43,44,47,48,49,50,51,55,56,57,59,60,61,62,63,67,70,72,74,76,79,81,82,88,89,90,91,94,95,96,100,103,104,106,108,110,111,114,115,117,122,123,126,127,131,132,133,134,135,136,139,140,144,147,150,151,152,153,156,157,158,163,166,167,168,171,173,175,176,178,179,181,182,183,186,189,192,198,199,203,205,207,210,213,214,216,220,222,227,230,235,236,245,246,247,249,250,252,254,259,260,261,262,263,266,269,272,273,274,275,276,279,281,288,291,292,293,294,295,297,302,306,307,309,310,311,313,314,315,318,319,324,327,328,330,331,335,337,338,342,345,346,355,356,357,358,359,363,364,365,368,371,374,377,378,379,381,384,386,387,388,390,391,392,394,395,396,397,398,399,401,407,410,411,412,413,417,419,424,427,430,431,437,438,439,440,443,446,450,451,455,457,458,459,460,462,464,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,500,501,502,503,504,505,506,507,508,510,511,512,515,516,518,519,521,522,523,524,525,526,527,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,555,557,562,565,566,567,568,570,571,577,578,579,581,582,586,588,589,590,592,593,594,595,600,601,602,603,606,609,610,611,613,614,618,623,624,625,630,632,637,638,639,644,645,646,648,650,651,654,658,659,662,666,667,669,670,671,678,679,683,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,708,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,746,747,748,749,750,751,753,758,759,760,763,764,765,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,791,793,794,798,799,800,802,804,805,806,807,808,809,810,811,812,813,821,822,823,825,827,831,835,836,837,838,839,840,841,842,844,845,846,847,848,850,852,853,854,858,859,860,866,869,873,874,876,877,882,884,886,887,888,891,892,894,895,897,898,900,902,905,906,907,909,914,915,918,919,920,922,923,925,927,931,932,937,938,939,940,943,944,945,947,951,953,954,955,957,958,959,961,963,966,967,971,972,973,974,975,977,978,979,982,983,986,989,990,991,993,998,999,1005,1006,1007,1010,1011,1012,1015,1020,1022,1023}; + const uint16_t g_unique_to_seed_10x10_p3[] = { 0,3,7,8,10,11,12,14,15,17,18,19,23,26,27,29,30,31,32,33,34,35,36,38,39,40,43,44,47,48,49,50,51,55,56,57,59,60,61,62,63,67,70,72,74,75,76,79,81,82,88,89,90,91,94,95,96,100,103,104,106,108,110,111,114,115,117,120,122,123,126,127,128,130,131,132,133,134,135,136,139,140,144,147,150,151,152,153,156,157,158,162,163,166,167,168,169,171,173,175,176,178,179,181,182,183,186,189,192,198,199,200,203,205,207,209,210,213,214,216,218,220,222,227,230,235,236,238,242,245,246,247,249,250,252,254,257,259,260,261,262,263,266,269,272,273,274,275,276,279,281,282,288,291,292,293,294,295,297,302,306,307,308,309,310,311,313,314,315,318,319,324,326,327,328,330,331,335,337,338,342,345,346,347,350,353,355,356,357,358,359,363,364,365,368,371,372,374,377,378,379,381,384,386,387,388,390,391,392,394,395,396,397,398,399,401,407,408,410,411,412,413,417,419,424,427,430,431,435,437,438,439,440,443,446,450,451,455,456,457,458,459,460,462,463,464,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,500,501,502,503,504,505,506,507,508,510,511,512,513,515,516,518,519,521,522,523,524,525,526,527,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,555,557,562,565,566,567,568,570,571,577,578,579,580,581,582,586,588,589,590,592,593,594,595,600,601,602,603,606,609,610,611,613,614,618,623,624,625,626,630,632,634,637,638,639,644,645,646,648,650,651,654,658,659,662,666,667,668,669,670,671,678,679,683,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,708,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,746,747,748,749,750,751,753,758,759,760,763,764,765,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,789,790,791,793,794,798,799,800,802,804,805,806,807,808,809,810,811,812,813,821,822,823,825,827,831,835,836,837,838,839,840,841,842,843,844,845,846,847,848,850,852,853,854,858,859,860,866,869,873,874,876,877,881,882,884,886,887,888,891,892,894,895,897,898,900,902,905,906,907,909,914,915,918,919,920,922,923,925,927,931,932,937,938,939,940,943,944,945,947,951,952,953,954,955,957,958,959,961,963,966,967,971,972,973,974,975,977,978,979,980,982,983,986,989,990,991,992,993,998,999,1003,1004,1005,1006,1007,1010,1011,1012,1014,1015,1020,1022,1023 }; + + const uint16_t g_unique_to_seed_12x10_p3[] = { 0,3,7,8,10,11,12,14,15,16,17,18,19,21,23,26,27,29,30,31,32,33,34,35,36,38,39,40,42,43,44,45,47,48,49,50,51,55,56,57,59,60,61,62,63,67,70,72,74,75,76,79,81,82,88,89,90,91,94,95,96,100,102,103,104,106,108,110,111,114,115,117,120,122,123,126,127,128,129,130,131,132,133,134,135,136,139,140,144,147,150,151,152,153,156,157,158,161,162,163,166,167,168,169,171,173,175,176,178,179,181,182,183,185,186,189,192,195,198,199,200,203,205,207,209,210,213,214,216,218,220,222,227,230,231,235,236,238,242,245,246,247,249,250,251,252,254,257,259,260,261,262,263,266,269,272,273,274,275,276,279,281,282,283,288,291,292,293,294,295,297,300,302,306,307,308,309,310,311,313,314,315,318,319,324,326,327,328,330,331,335,337,338,342,345,346,347,348,350,353,355,356,357,358,359,363,364,365,368,371,372,374,377,378,379,381,384,386,387,388,390,391,392,394,395,396,397,398,399,401,407,408,410,411,412,413,415,417,419,424,427,430,431,435,437,438,439,440,443,446,450,451,455,456,457,458,459,460,461,462,463,464,465,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,500,501,502,503,504,505,506,507,508,510,511,512,513,515,516,518,519,521,522,523,524,525,526,527,530,532,538,539,541,543,544,546,547,549,550,551,552,553,554,555,557,558,562,563,565,566,567,568,570,571,577,578,579,580,581,582,584,586,588,589,590,592,593,594,595,600,601,602,603,604,606,609,610,611,613,614,618,623,624,625,626,630,632,634,637,638,639,643,644,645,646,648,650,651,654,658,659,662,666,667,668,669,670,671,673,678,679,683,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,708,711,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,746,747,748,749,750,751,753,758,759,760,763,764,765,766,767,769,771,773,775,776,779,780,781,783,784,785,786,787,788,789,790,791,793,794,798,799,800,802,804,805,806,807,808,809,810,811,812,813,821,822,823,824,825,827,828,831,835,836,837,838,839,840,841,842,843,844,845,846,847,848,850,852,853,854,858,859,860,866,869,873,874,876,877,881,882,884,886,887,888,891,892,894,895,897,898,900,902,905,906,907,909,914,915,918,919,920,922,923,925,927,931,932,937,938,939,940,943,944,945,947,951,952,953,954,955,957,958,959,961,963,966,967,971,972,973,974,975,977,978,979,980,982,983,986,989,990,991,992,993,995,998,999,1002,1003,1004,1005,1006,1007,1010,1011,1012,1014,1015,1020,1021,1022,1023 }; + const uint16_t g_unique_to_seed_12x12_p3[] = { 0,3,4,7,8,10,11,12,14,15,16,17,18,19,21,23,26,27,29,30,31,32,33,34,35,36,38,39,40,42,43,44,45,47,48,49,50,51,53,55,56,57,58,59,60,61,62,63,67,70,72,74,75,76,79,81,82,83,88,89,90,91,94,95,96,100,102,103,104,106,108,110,111,114,115,117,120,122,123,126,127,128,129,130,131,132,133,134,135,136,138,139,140,144,147,150,151,152,153,156,157,158,159,160,161,162,163,166,167,168,169,171,173,175,176,177,178,179,181,182,183,185,186,189,192,195,196,198,199,200,203,205,207,208,209,210,213,214,216,218,220,222,227,230,231,235,236,238,242,245,246,247,249,250,251,252,254,257,259,260,261,262,263,266,269,272,273,274,275,276,279,281,282,283,288,291,292,293,294,295,297,300,302,306,307,308,309,310,311,313,314,315,318,319,324,326,327,328,330,331,335,337,338,342,345,346,347,348,350,353,355,356,357,358,359,363,364,365,368,371,372,374,377,378,379,381,384,386,387,388,390,391,392,394,395,396,397,398,399,401,407,408,410,411,412,413,415,417,419,424,426,427,430,431,432,435,437,438,439,440,443,444,446,450,451,455,456,457,458,459,460,461,462,463,464,465,466,467,468,470,471,472,474,475,477,478,479,480,482,483,485,487,488,493,495,496,497,500,501,502,503,504,505,506,507,508,510,511,512,513,515,516,518,519,521,522,523,524,525,526,527,530,532,535,538,539,540,541,543,544,546,547,549,550,551,552,553,554,555,557,558,562,563,565,566,567,568,569,570,571,577,578,579,580,581,582,584,586,588,589,590,592,593,594,595,600,601,602,603,604,606,609,610,611,613,614,618,623,624,625,626,628,630,631,632,634,636,637,638,639,640,643,644,645,646,648,650,651,654,658,659,662,666,667,668,669,670,671,673,678,679,683,685,686,687,688,689,691,694,695,696,698,699,700,701,703,704,707,708,711,712,713,714,715,717,719,722,724,725,727,730,731,732,734,735,738,739,742,743,745,746,747,748,749,750,751,753,758,759,760,763,764,765,766,767,768,769,771,773,774,775,776,778,779,780,781,783,784,785,786,787,788,789,790,791,793,794,798,799,800,802,804,805,806,807,808,809,810,811,812,813,821,822,823,824,825,827,828,831,835,836,837,838,839,840,841,842,843,844,845,846,847,848,850,852,853,854,858,859,860,863,866,869,873,874,876,877,881,882,884,886,887,888,891,892,894,895,897,898,900,902,905,906,907,909,911,912,914,915,918,919,920,922,923,925,927,929,930,931,932,937,938,939,940,943,944,945,947,951,952,953,954,955,957,958,959,961,963,966,967,971,972,973,974,975,977,978,979,980,982,983,986,989,990,991,992,993,995,998,999,1000,1002,1003,1004,1005,1006,1007,1010,1011,1012,1014,1015,1020,1021,1022,1023 }; + + static const uint16_t* g_unique_index_to_astc_part_seed[2][astc_helpers::NUM_ASTC_BLOCK_SIZES] = // [num_parts][astc_block_size_index] + { + { + g_unique_to_seed_4x4_p2, g_unique_to_seed_5x4_p2, g_unique_to_seed_5x5_p2, g_unique_to_seed_6x5_p2, + g_unique_to_seed_6x6_p2, g_unique_to_seed_8x5_p2, g_unique_to_seed_8x6_p2, g_unique_to_seed_10x5_p2, + g_unique_to_seed_10x6_p2, g_unique_to_seed_8x8_p2, g_unique_to_seed_10x8_p2, g_unique_to_seed_10x10_p2, + g_unique_to_seed_12x10_p2, g_unique_to_seed_12x12_p2 + }, + { + g_unique_to_seed_4x4_p3, g_unique_to_seed_5x4_p3, g_unique_to_seed_5x5_p3, g_unique_to_seed_6x5_p3, + g_unique_to_seed_6x6_p3, g_unique_to_seed_8x5_p3, g_unique_to_seed_8x6_p3, g_unique_to_seed_10x5_p3, + g_unique_to_seed_10x6_p3, g_unique_to_seed_8x8_p3, g_unique_to_seed_10x8_p3, g_unique_to_seed_10x10_p3, + g_unique_to_seed_12x10_p3, g_unique_to_seed_12x12_p3 + } + }; + + static inline uint16_t unique_pat_index_to_part_seed(uint32_t astc_block_size_index, uint32_t num_parts, uint32_t unique_pat_index) + { + assert(astc_block_size_index < astc_helpers::NUM_ASTC_BLOCK_SIZES); + assert((num_parts >= 2) && (num_parts <= 3)); + assert(unique_pat_index < get_total_unique_patterns(astc_block_size_index, num_parts)); + + return g_unique_index_to_astc_part_seed[num_parts - 2][astc_block_size_index][unique_pat_index]; + } + + static bool zstd_decompress(const void *pComp_data, size_t comp_size, basisu::uint8_vec &uncomp_data) + { + if (!comp_size) + { + uncomp_data.resize(0); + return true; + } + +#if BASISD_SUPPORT_KTX2_ZSTD + const uint64_t decomp_size = ZSTD_getFrameContentSize(pComp_data, comp_size); + + if ((decomp_size == ZSTD_CONTENTSIZE_UNKNOWN) || (decomp_size == ZSTD_CONTENTSIZE_ERROR)) + { + BASISU_DEVEL_ERROR("zstd_decompress: ZSTD_getFrameContentSize failed\n"); + return false; + } + + // sanity check, not UINT32_MAX purposely, even INT_MAX is too high + if (decomp_size > (uint64_t)INT32_MAX) + { + BASISU_DEVEL_ERROR("zstd_decompress: decompressed size too large\n"); + return false; + } + + if (!uncomp_data.try_resize((size_t)decomp_size)) + { + BASISU_DEVEL_ERROR("zstd_decompress: Out of memory\n"); + return false; + } + + if (!decomp_size) + return true; + + const size_t actual_uncomp_size = ZSTD_decompress(uncomp_data.data(), uncomp_data.size(), pComp_data, comp_size); + if (ZSTD_isError(actual_uncomp_size)) + { + BASISU_DEVEL_ERROR("zstd_decompress: Zstd decompression failed, file is invalid or corrupted\n"); + return false; + } + + assert(actual_uncomp_size == decomp_size); + uncomp_data.resize(actual_uncomp_size); + + return true; +#else + BASISU_NOTE_UNUSED(pComp_data); + BASISU_DEVEL_ERROR("zstd_decompress: file uses ZStd compression, but ZStd support disabled (see BASISD_SUPPORT_KTX2_ZSTD) \n"); + return false; +#endif + } + + static bool zstd_decompress_and_advance(const uint8_t* &pComp_data, size_t comp_size, basisu::uint8_vec& uncomp_data, simplified_bitwise_decoder& dec) + { + if (!zstd_decompress(pComp_data, comp_size, uncomp_data)) + return false; + pComp_data += comp_size; + dec.init(uncomp_data.data(), uncomp_data.size()); + return true; + } + + bool xuastc_ldr_decompress_image_full_zstd( + const uint8_t* pComp_data_all, size_t comp_data_size_all, + uint32_t& astc_block_width, uint32_t& astc_block_height, + uint32_t& actual_width, uint32_t& actual_height, bool& has_alpha, bool& uses_srgb_astc_decode_mode, + bool debug_output, + xuastc_decomp_image_init_callback_ptr pInit_callback, void* pInit_callback_data, + xuastc_decomp_image_block_callback_ptr pBlock_callback, void* pBlock_callback_data) + { + if (comp_data_size_all < sizeof(xuastc_ldr_full_zstd_header)) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + const xuastc_ldr_full_zstd_header* pHdr = (const xuastc_ldr_full_zstd_header*)pComp_data_all; + + if ((!pHdr->m_raw_bits_len) || (!pHdr->m_mode_bytes_len)) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + const uint64_t total_comp_size = (uint64_t)((uint32_t)pHdr->m_raw_bits_len) + + pHdr->m_mode_bytes_len + pHdr->m_solid_dpcm_bytes_len + pHdr->m_endpoint_dpcm_reuse_indices_len + pHdr->m_use_bc_bits_len + + pHdr->m_endpoint_dpcm_3bit_len + pHdr->m_endpoint_dpcm_4bit_len + pHdr->m_endpoint_dpcm_5bit_len + pHdr->m_endpoint_dpcm_6bit_len + pHdr->m_endpoint_dpcm_7bit_len + pHdr->m_endpoint_dpcm_8bit_len + + pHdr->m_mean0_bits_len + pHdr->m_mean1_bytes_len + + pHdr->m_run_bytes_len + pHdr->m_coeff_bytes_len + pHdr->m_sign_bits_len + + pHdr->m_weight2_bits_len + pHdr->m_weight3_bits_len + pHdr->m_weight4_bits_len + pHdr->m_weight8_bytes_len; + + if (comp_data_size_all < (sizeof(xuastc_ldr_full_zstd_header) + total_comp_size)) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + bitwise_decoder raw_bits; + simplified_bitwise_decoder comp_mode_dec, solid_dpcm_dec, endpoint_dpcm_reuse_indices_dec, use_bc_bits_dec; + simplified_bitwise_decoder endpoint_dpcm_3bit_dec, endpoint_dpcm_4bit_dec, endpoint_dpcm_5bit_dec, endpoint_dpcm_6bit_dec, endpoint_dpcm_7bit_dec, endpoint_dpcm_8bit_dec; + + basisu::uint8_vec uncomp_mode_bytes, uncomp_solid_dpcm_bytes, uncomp_endpoint_dpcm_reuse_indices, uncomp_use_bc_bits; + basisu::uint8_vec uncomp_endpoint_dpcm_3bit, uncomp_endpoint_dpcm_4bit, uncomp_endpoint_dpcm_5bit, uncomp_endpoint_dpcm_6bit, uncomp_endpoint_dpcm_7bit, uncomp_endpoint_dpcm_8bit; + + basisu::uint8_vec uncomp_mean0_bits, uncomp_mean1_bytes, uncomp_run_bytes, uncomp_coeff_bytes, uncomp_weight2_bytes, uncomp_weight3_bytes, uncomp_weight4_bytes, uncomp_weight8_bytes; + simplified_bitwise_decoder mean0_bits, mean1_bytes, run_bytes, coeff_bytes, sign_bits, weight2_bits, weight3_bits, weight4_bits, weight8_bytes; + + const uint8_t* pCur_buf = pComp_data_all + sizeof(xuastc_ldr_full_zstd_header); + + // raw bits + { + raw_bits.init(pCur_buf, pHdr->m_raw_bits_len); + pCur_buf += pHdr->m_raw_bits_len; + } + + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_mode_bytes_len, uncomp_mode_bytes, comp_mode_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_solid_dpcm_bytes_len, uncomp_solid_dpcm_bytes, solid_dpcm_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_reuse_indices_len, uncomp_endpoint_dpcm_reuse_indices, endpoint_dpcm_reuse_indices_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_use_bc_bits_len, uncomp_use_bc_bits, use_bc_bits_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_3bit_len, uncomp_endpoint_dpcm_3bit, endpoint_dpcm_3bit_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_4bit_len, uncomp_endpoint_dpcm_4bit, endpoint_dpcm_4bit_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_5bit_len, uncomp_endpoint_dpcm_5bit, endpoint_dpcm_5bit_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_6bit_len, uncomp_endpoint_dpcm_6bit, endpoint_dpcm_6bit_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_7bit_len, uncomp_endpoint_dpcm_7bit, endpoint_dpcm_7bit_dec)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_endpoint_dpcm_8bit_len, uncomp_endpoint_dpcm_8bit, endpoint_dpcm_8bit_dec)) + return false; + + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_mean0_bits_len, uncomp_mean0_bits, mean0_bits)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_mean1_bytes_len, uncomp_mean1_bytes, mean1_bytes)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_run_bytes_len, uncomp_run_bytes, run_bytes)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_coeff_bytes_len, uncomp_coeff_bytes, coeff_bytes)) + return false; + + // sign + { + sign_bits.init(pCur_buf, pHdr->m_sign_bits_len); + pCur_buf += pHdr->m_sign_bits_len; + } + + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_weight2_bits_len, uncomp_weight2_bytes, weight2_bits)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_weight3_bits_len, uncomp_weight3_bytes, weight3_bits)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_weight4_bits_len, uncomp_weight4_bytes, weight4_bits)) + return false; + if (!zstd_decompress_and_advance(pCur_buf, pHdr->m_weight8_bytes_len, uncomp_weight8_bytes, weight8_bytes)) + return false; + + // sanity check + const uint64_t total_read_size = pCur_buf - pComp_data_all; + if (total_read_size > comp_data_size_all) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + const uint32_t header_val = raw_bits.get_bits(FULL_ZSTD_HEADER_MARKER_BITS); + if (header_val != FULL_ZSTD_HEADER_MARKER) + { + BASISU_DEVEL_ERROR("Invalid marker\n"); + return false; + } + + const uint32_t astc_block_size_index = raw_bits.get_bits(4); + if (astc_block_size_index >= astc_helpers::NUM_ASTC_BLOCK_SIZES) + { + BASISU_DEVEL_ERROR("Invalid block dimension index\n"); + return false; + } + + astc_block_width = astc_helpers::g_astc_block_sizes[astc_block_size_index][0]; + astc_block_height = astc_helpers::g_astc_block_sizes[astc_block_size_index][1]; + + uses_srgb_astc_decode_mode = raw_bits.get_bits(1); + + actual_width = raw_bits.get_bits(16); + actual_height = raw_bits.get_bits(16); + has_alpha = raw_bits.get_bits(1); + + const bool use_dct = (raw_bits.get_bits(1) != 0); + + int int_q = 0; + if (use_dct) + int_q = raw_bits.get_bits(8); + + const float dct_q = (float)int_q / 2.0f; + if ((use_dct) && ((dct_q <= 0.0f) || (dct_q > 100.0f))) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid DCT global quality factor\n"); + return false; + } + + if (debug_output) + { + basisu::fmt_debug_printf("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: block dim: {}x{}, image dim: {}x{}, sRGB decode profile: {}, has_alpha: {}, dct: {} dct_q: {}\n", + astc_block_width, astc_block_height, + actual_width, actual_height, + uses_srgb_astc_decode_mode, has_alpha, + use_dct, dct_q); + } + + const uint32_t num_blocks_x = (actual_width + astc_block_width - 1) / astc_block_width; + const uint32_t num_blocks_y = (actual_height + astc_block_height - 1) / astc_block_height; + + if (pInit_callback) + { + if (!(*pInit_callback)(num_blocks_x, num_blocks_y, astc_block_width, astc_block_height, uses_srgb_astc_decode_mode, dct_q, has_alpha, pInit_callback_data)) + return false; + } + + fvec dct_work; + + assert((size_t)astc_block_size_index < std::size(g_encoder_trial_modes)); + const auto& encoder_trial_modes = g_encoder_trial_modes[astc_block_size_index]; + + const grid_weight_dct& grid_dct = g_grid_weight_dcts[astc_block_size_index]; + + basisu::vector2D log_blocks; + if (!log_blocks.try_resize(num_blocks_x, 8)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: out of memory\n"); + return false; + } + + memset(log_blocks.get_ptr(), 0, log_blocks.size_in_bytes()); + + basisu::vector2D prev_block_states; + if (!prev_block_states.try_resize(num_blocks_x, 2)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: out of memory\n"); + return false; + } + + uint32_t cur_run_len = 0; + + int part2_hash[PART_HASH_SIZE]; + std::fill(part2_hash, part2_hash + PART_HASH_SIZE, -1); + + int part3_hash[PART_HASH_SIZE]; + std::fill(part3_hash, part3_hash + PART_HASH_SIZE, -1); + + int tm_hash[TM_HASH_SIZE]; + std::fill(tm_hash, tm_hash + TM_HASH_SIZE, -1); + + dct_syms syms; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + prev_block_state_full_zstd& new_prev_state = prev_block_states(bx, by & 1); + + const prev_block_state_full_zstd* pLeft_state = bx ? &prev_block_states(bx - 1, by & 1) : nullptr; + const prev_block_state_full_zstd* pUpper_state = by ? &prev_block_states(bx, (by - 1) & 1) : nullptr; + + astc_helpers::log_astc_block& log_blk = log_blocks(bx, by & 7); + + if (cur_run_len) + { + const prev_block_state_full_zstd* pPrev_block_state = pLeft_state ? pLeft_state : pUpper_state; + const astc_helpers::log_astc_block& prev_log_blk = bx ? log_blocks(bx - 1, by & 7) : log_blocks(bx, (by - 1) & 7); + + memcpy((void*)&log_blk, (const void*)&prev_log_blk, sizeof(log_blk)); + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, prev_log_blk, pBlock_callback_data)) + return false; + } + + new_prev_state.m_tm_index = pPrev_block_state->m_tm_index; + //new_prev_state.m_base_cem_index = pPrev_block_state->m_base_cem_index; + + cur_run_len--; + continue; + } + + const prev_block_state_full_zstd* pDiag_state = (bx && by) ? &prev_block_states(bx - 1, (by - 1) & 1) : nullptr; + + // TODO: End check + const uint32_t mode_byte = comp_mode_dec.get_bits8(); + + if ((mode_byte & 3) == (uint32_t)xuastc_zstd_mode::cMODE_RUN) + { + // run + cur_run_len = 1 + (mode_byte >> 2); + + if (!bx && !by) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid run command\n"); + return false; + } + + const uint32_t max_possible_run_len = num_blocks_x - bx; + if (cur_run_len > max_possible_run_len) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid run len\n"); + return false; + } + + const prev_block_state_full_zstd* pPrev_block_state = pLeft_state ? pLeft_state : pUpper_state; + const astc_helpers::log_astc_block& prev_log_blk = bx ? log_blocks(bx - 1, by & 7) : log_blocks(bx, (by - 1) & 7); + + memcpy((void*)&log_blk, (const void*)&prev_log_blk, sizeof(log_blk)); + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, prev_log_blk, pBlock_callback_data)) + return false; + } + + new_prev_state.m_tm_index = pPrev_block_state->m_tm_index; + + cur_run_len--; + + continue; + } + else if ((mode_byte & 15) == (uint32_t)xuastc_zstd_mode::cMODE_SOLID) + { + // solid + const astc_helpers::log_astc_block* pPrev_log_blk = bx ? &log_blocks(bx - 1, by & 7) : (by ? &log_blocks(bx, (by - 1) & 7) : nullptr); + + uint32_t prev_solid_color[4] = { 0 }; + + if (pPrev_log_blk) + { + if (pPrev_log_blk->m_solid_color_flag_ldr) + { + prev_solid_color[0] = pPrev_log_blk->m_solid_color[0] >> 8; + prev_solid_color[1] = pPrev_log_blk->m_solid_color[1] >> 8; + prev_solid_color[2] = pPrev_log_blk->m_solid_color[2] >> 8; + prev_solid_color[3] = pPrev_log_blk->m_solid_color[3] >> 8; + } + else + { + // Decode previous block's first CEM, use the halfway point as the predictor. + color_rgba prev_l, prev_h; + decode_endpoints(pPrev_log_blk->m_color_endpoint_modes[0], pPrev_log_blk->m_endpoints, pPrev_log_blk->m_endpoint_ise_range, prev_l, prev_h); + + prev_solid_color[0] = (prev_l[0] + prev_h[0] + 1) >> 1; + prev_solid_color[1] = (prev_l[1] + prev_h[1] + 1) >> 1; + prev_solid_color[2] = (prev_l[2] + prev_h[2] + 1) >> 1; + prev_solid_color[3] = (prev_l[3] + prev_h[3] + 1) >> 1; + } + } + + uint32_t delta_r = solid_dpcm_dec.get_bits8(); + uint32_t delta_g = solid_dpcm_dec.get_bits8(); + uint32_t delta_b = solid_dpcm_dec.get_bits8(); + uint32_t delta_a = has_alpha ? solid_dpcm_dec.get_bits8() : 0; + + uint32_t r = (prev_solid_color[0] + delta_r) & 0xFF; + uint32_t g = (prev_solid_color[1] + delta_g) & 0xFF; + uint32_t b = (prev_solid_color[2] + delta_b) & 0xFF; + uint32_t a = 255; + if (has_alpha) + a = (prev_solid_color[3] + delta_a) & 0xFF; + + log_blk.clear(); + log_blk.m_solid_color_flag_ldr = true; + log_blk.m_solid_color[0] = (uint16_t)(r | (r << 8)); + log_blk.m_solid_color[1] = (uint16_t)(g | (g << 8)); + log_blk.m_solid_color[2] = (uint16_t)(b | (b << 8)); + log_blk.m_solid_color[3] = (uint16_t)(a | (a << 8)); + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, log_blk, pBlock_callback_data)) + return false; + } + + new_prev_state.m_tm_index = -1; + + continue; + } + + new_prev_state.clear(); + + //log_blk.clear(); + memset((void*)&log_blk, 0, offsetof(astc_helpers::log_astc_block, m_weights)); + + uint32_t tm_index = 0; + uint32_t actual_cem = 0; + + if ((mode_byte & 1) == 0) + { + // raw + uint32_t config_reuse_index = (mode_byte >> 1) & 3; + + if (config_reuse_index < 3) + { + // 0 = left, 1 = upper, 2 = left-upper + int cfg_dx = 0, cfg_dy = 0; + const prev_block_state_full_zstd* pCfg_state = nullptr; + + switch (config_reuse_index) + { + case 0: cfg_dx = -1; pCfg_state = pLeft_state; break; + case 1: cfg_dx = 0; cfg_dy = -1; pCfg_state = pUpper_state; break; + case 2: cfg_dx = -1; cfg_dy = -1; pCfg_state = pDiag_state; break; + default: assert(0); break; + } + + if ((((cfg_dx + (int)bx) < 0) || + ((cfg_dy + (int)by) < 0)) || + (!pCfg_state)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid config reuse\n"); + return false; + } + + astc_helpers::log_astc_block& cfg_log_blk = log_blocks((int)bx + cfg_dx, ((int)by + cfg_dy) & 7); + + tm_index = pCfg_state->m_tm_index; + + if (pCfg_state->m_tm_index < 0) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid config reuse\n"); + return false; + } + + log_blk.m_partition_id = cfg_log_blk.m_partition_id; + actual_cem = cfg_log_blk.m_color_endpoint_modes[0]; + + new_prev_state.m_tm_index = tm_index; + //new_prev_state.m_base_cem_index = pCfg_state->m_base_cem_index; // base cem not including base+ofs, not actual + } + else + { + if (mode_byte & XUASTC_LDR_MODE_BYTE_TM_HASH_HIT_FLAG) + { + uint32_t tm_hash_index = raw_bits.get_bits(TM_HASH_BITS); + tm_index = tm_hash[tm_hash_index]; + } + else + { + tm_index = raw_bits.decode_truncated_binary(encoder_trial_modes.size_u32()); + + tm_hash[tm_hash_index(tm_index)] = tm_index; + } + + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid tm_index\n"); + return false; + } + + new_prev_state.m_tm_index = tm_index; + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + actual_cem = tm.m_cem; + + if ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + // Decode is_base_ofs bit + bool is_base_ofs = (mode_byte & XUASTC_LDR_MODE_BYTE_IS_BASE_OFS_FLAG) != 0; + + if (is_base_ofs) + { + if (actual_cem == astc_helpers::CEM_LDR_RGB_DIRECT) + actual_cem = astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET; + else if (actual_cem == astc_helpers::CEM_LDR_RGBA_DIRECT) + actual_cem = astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET; + } + } + + if (tm.m_num_parts > 1) + { + const uint32_t total_unique_indices = get_total_unique_patterns(astc_block_size_index, tm.m_num_parts); + int* pPart_hash = (tm.m_num_parts == 2) ? part2_hash : part3_hash; + + const bool hash_hit_flag = (mode_byte & XUASTC_LDR_MODE_BYTE_PART_HASH_HIT) != 0; + + uint32_t unique_pat_index; + if (hash_hit_flag) + { + uint32_t h = raw_bits.get_bits(basist::astc_ldr_t::PART_HASH_BITS); + + unique_pat_index = pPart_hash[h]; + } + else + { + unique_pat_index = raw_bits.decode_truncated_binary(total_unique_indices); + + pPart_hash[basist::astc_ldr_t::part_hash_index(unique_pat_index)] = unique_pat_index; + } + + if (unique_pat_index >= get_total_unique_patterns(astc_block_size_index, tm.m_num_parts)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: invalid unique_pat_index, decompression failed (file corrupt)\n"); + return false; + } + + log_blk.m_partition_id = unique_pat_index_to_part_seed(astc_block_size_index, tm.m_num_parts, unique_pat_index); + } + + + } // if (config_reuse_index < 3) + + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: invalid tm_index, decompression failed (file corrupt)\n"); + return false; + } + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + const bool actual_cem_supports_bc = astc_helpers::cem_supports_bc(actual_cem); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(actual_cem); + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + log_blk.m_color_endpoint_modes[part_iter] = (uint8_t)actual_cem; + + log_blk.m_num_partitions = (uint8_t)tm.m_num_parts; + log_blk.m_dual_plane = (tm.m_ccs_index >= 0); + if (log_blk.m_dual_plane) + log_blk.m_color_component_selector = (uint8_t)tm.m_ccs_index; + + log_blk.m_weight_ise_range = (uint8_t)tm.m_weight_ise_range; + log_blk.m_endpoint_ise_range = (uint8_t)tm.m_endpoint_ise_range; + log_blk.m_grid_width = (uint8_t)tm.m_grid_width; + log_blk.m_grid_height = (uint8_t)tm.m_grid_height; + + const bool used_dpcm_endpoints_flag = (mode_byte & XUASTC_LDR_MODE_BYTE_DPCM_ENDPOINTS_FLAG) != 0; + + if (used_dpcm_endpoints_flag) + { + const int num_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range); + const auto& endpoint_rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE; + const auto& endpoint_ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank; + + uint32_t reuse_delta_index = endpoint_dpcm_reuse_indices_dec.get_bits8(); + if (reuse_delta_index >= NUM_REUSE_XY_DELTAS) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid reuse delta\n"); + return false; + } + + const int reuse_bx = (int)bx + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_delta_index].m_x; + const int reuse_by = (int)by + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_delta_index].m_y; + + if ((reuse_bx < 0) || (reuse_by < 0) || (reuse_bx >= (int)num_blocks_x) || (reuse_by >= (int)num_blocks_y)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid reuse delta\n"); + return false; + } + + const astc_helpers::log_astc_block* pEndpoint_pred_log_blk = &log_blocks(reuse_bx, reuse_by & 7); + if (pEndpoint_pred_log_blk->m_solid_color_flag_ldr) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid reuse delta\n"); + return false; + } + + bool endpoints_use_bc[astc_helpers::MAX_PARTITIONS] = { }; + + if (actual_cem_supports_bc) + { + for (uint32_t part_iter = 0; part_iter < log_blk.m_num_partitions; part_iter++) + endpoints_use_bc[part_iter] = (use_bc_bits_dec.get_bits1() != 0); + } + + uint8_t predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + for (uint32_t part_iter = 0; part_iter < log_blk.m_num_partitions; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + // Mini-CEM encoder, to cross CEM domains. + bool conv_status = convert_endpoints_across_cems( + pEndpoint_pred_log_blk->m_color_endpoint_modes[0], pEndpoint_pred_log_blk->m_endpoint_ise_range, pEndpoint_pred_log_blk->m_endpoints, + log_blk.m_color_endpoint_modes[0], log_blk.m_endpoint_ise_range, predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Failed predicting endpoints\n"); + return false; + } + } + + if (num_endpoint_levels <= 8) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_3bit_dec.get_bits4(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + else if (num_endpoint_levels <= 16) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_4bit_dec.get_bits4(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + else if (num_endpoint_levels <= 32) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_5bit_dec.get_bits8(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + else if (num_endpoint_levels <= 64) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_6bit_dec.get_bits8(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + else if (num_endpoint_levels <= 128) + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_7bit_dec.get_bits8(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + else + { + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = endpoint_dpcm_8bit_dec.get_bits8(); + + int e_val = (delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]]) % num_endpoint_levels; + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + } + } + } + } + else + { + if (!decode_values(raw_bits, tm.m_num_parts * total_endpoint_vals, log_blk.m_endpoint_ise_range, log_blk.m_endpoints)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: decode_values() failed\n"); + return false; + } + } + } + else if ((mode_byte & 15) >= (uint32_t)xuastc_zstd_mode::cMODE_REUSE_CFG_ENDPOINTS_LEFT) + { + // reuse full cfg+endpoints+part id + const uint32_t reuse_index = ((mode_byte >> 2) & 3) - 1; + + int cfg_dx = 0, cfg_dy = 0; + const prev_block_state_full_zstd* pCfg_state = nullptr; + + switch (reuse_index) + { + case 0: cfg_dx = -1; pCfg_state = pLeft_state; break; + case 1: cfg_dx = 0; cfg_dy = -1; pCfg_state = pUpper_state; break; + case 2: cfg_dx = -1; cfg_dy = -1; pCfg_state = pDiag_state; break; + default: assert(0); break; + } + + if ((((cfg_dx + (int)bx) < 0) || + ((cfg_dy + (int)by) < 0)) || + (!pCfg_state)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid config reuse\n"); + return false; + } + + const astc_helpers::log_astc_block& cfg_log_blk = log_blocks((int)bx + cfg_dx, ((int)by + cfg_dy) & 7); + + if (pCfg_state->m_tm_index < 0) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Invalid config reuse\n"); + return false; + } + + tm_index = pCfg_state->m_tm_index; + actual_cem = cfg_log_blk.m_color_endpoint_modes[0]; + + for (uint32_t i = 0; i < cfg_log_blk.m_num_partitions; i++) + log_blk.m_color_endpoint_modes[i] = (uint8_t)actual_cem; + + log_blk.m_dual_plane = cfg_log_blk.m_dual_plane; + log_blk.m_color_component_selector = cfg_log_blk.m_color_component_selector; + log_blk.m_num_partitions = cfg_log_blk.m_num_partitions; + log_blk.m_partition_id = cfg_log_blk.m_partition_id; + log_blk.m_endpoint_ise_range = cfg_log_blk.m_endpoint_ise_range; + log_blk.m_weight_ise_range = cfg_log_blk.m_weight_ise_range; + log_blk.m_grid_width = cfg_log_blk.m_grid_width; + log_blk.m_grid_height = cfg_log_blk.m_grid_height; + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(actual_cem) * log_blk.m_num_partitions; + memcpy(log_blk.m_endpoints, cfg_log_blk.m_endpoints, total_endpoint_vals); + + new_prev_state.m_tm_index = tm_index; + } + else + { + // shouldn't actually get here + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: decompression failed\n"); + return false; + } + + // Decode weights + + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd: invalid tm_index, decompression failed (file corrupt)\n"); + return false; + } + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + const uint32_t total_planes = (tm.m_ccs_index >= 0) ? 2 : 1; + const uint32_t total_weights = tm.m_grid_width * tm.m_grid_height; + + bool block_used_dct = false; + if (use_dct) + block_used_dct = ((mode_byte & XUASTC_LDR_MODE_BYTE_USE_DCT) != 0); + + if (block_used_dct) + { + const astc_block_grid_data* pGrid_data = find_astc_block_grid_data(astc_block_width, astc_block_height, log_blk.m_grid_width, log_blk.m_grid_height); + + const uint32_t num_dc_levels = grid_weight_dct::get_num_weight_dc_levels(log_blk.m_weight_ise_range); + syms.m_num_dc_levels = num_dc_levels; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + syms.m_coeffs.resize(0); + + if (num_dc_levels == DCT_MEAN_LEVELS1) + syms.m_dc_sym = mean1_bytes.get_bits8(); + else + syms.m_dc_sym = mean0_bits.get_bits4(); + + uint32_t cur_zig_ofs = 1; + + while (cur_zig_ofs < total_weights) + { + uint32_t run_len = run_bytes.get_bits8(); + if (run_len == DCT_RUN_LEN_EOB_SYM_INDEX) + break; + + cur_zig_ofs += run_len; + + if (cur_zig_ofs >= total_weights) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::DCT decode error\n"); + return false; + } + + int sign = sign_bits.get_bits1(); + int coeff = coeff_bytes.get_bits8() + 1; + + if (sign) + coeff = -coeff; + + syms.m_coeffs.push_back(dct_syms::coeff(basisu::safe_cast_uint16(run_len), basisu::safe_cast_int16(coeff))); + cur_zig_ofs++; + } + + // weight grid IDCT + if (!grid_dct.decode_block_weights(dct_q, plane_iter, log_blk, nullptr, pGrid_data, nullptr, dct_work, &syms)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::DCT decode failed\n"); + return false; + } + + } // plane_iter + } + else + { + // Weight grid DPCM (no dependency on other blocks, or between planes, for determinism even when IDCT is used) + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(log_blk.m_weight_ise_range); + const auto& weight_rank_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_rank_to_ISE; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + if (num_weight_levels < 4) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight2_bits.get_bits2(); + + uint32_t w = (prev_w + r) % num_weight_levels; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels == 4) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight2_bits.get_bits2(); + + uint32_t w = (prev_w + r) & 3; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels < 8) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight3_bits.get_bits4(); + + uint32_t w = (prev_w + r) % num_weight_levels; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels == 8) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight3_bits.get_bits4(); + + uint32_t w = (prev_w + r) & 7; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels < 16) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight4_bits.get_bits4(); + + uint32_t w = (prev_w + r) % num_weight_levels; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels == 16) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight4_bits.get_bits4(); + + uint32_t w = (prev_w + r) & 15; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight8_bytes.get_bits8(); + + uint32_t w = (prev_w + r) % num_weight_levels; + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + + } // plane_iter + + } // if (block_used_dct) + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, log_blk, pBlock_callback_data)) + return false; + } + + } // bx + + } // by + + assert(!cur_run_len); + + const uint32_t final_sync_marker = raw_bits.get_bits(FINAL_SYNC_MARKER_BITS); + if (final_sync_marker != FINAL_SYNC_MARKER) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Final sync check failed (1)\n"); + return false; + } + + if (comp_mode_dec.m_pBuf != comp_mode_dec.m_pBuf_end) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image_full_zstd::Final sync check failed (2)\n"); + return false; + } + + return true; + } + + bool xuastc_ldr_decompress_image( + const uint8_t* pComp_data_all, size_t comp_data_size_all, + uint32_t& astc_block_width, uint32_t& astc_block_height, + uint32_t& actual_width, uint32_t& actual_height, bool& has_alpha, bool& uses_srgb_astc_decode_mode, + bool debug_output, + xuastc_decomp_image_init_callback_ptr pInit_callback, void* pInit_callback_data, + xuastc_decomp_image_block_callback_ptr pBlock_callback, void* pBlock_callback_data) + { + if (debug_output) + basisu::debug_printf("\n------------------- astc_ldr_t::decompress_image\n"); + + assert(g_initialized); + + astc_block_width = 0; + astc_block_height = 0; + actual_width = 0; + actual_height = 0; + has_alpha = false; + uses_srgb_astc_decode_mode = false; + + if (!g_initialized) + { + BASISU_DEVEL_ERROR("Not initialized"); + //dec_blocks.clear(); + return false; + } + + if (comp_data_size_all < 1) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + const uint8_t first_comp_byte = pComp_data_all[0]; + + if (first_comp_byte == (uint8_t)xuastc_ldr_syntax::cFullZStd) + { + return xuastc_ldr_decompress_image_full_zstd( + pComp_data_all, comp_data_size_all, + astc_block_width, astc_block_height, + actual_width, actual_height, has_alpha, uses_srgb_astc_decode_mode, + debug_output, + pInit_callback, pInit_callback_data, + pBlock_callback, pBlock_callback_data); + } + + // Either full arith or hybrid arith+zstd now + + const xuastc_ldr_arith_header* pHdr = nullptr; + + const uint8_t* pComp_data = pComp_data_all + 1; + size_t comp_data_size = comp_data_size_all - 1; + + basisu::uint8_vec uncomp_mean0_bits, uncomp_mean1_bytes, uncomp_run_bytes, uncomp_coeff_bytes, uncomp_weight2_bytes, uncomp_weight3_bytes, uncomp_weight4_bytes, uncomp_weight8_bytes; + simplified_bitwise_decoder mean0_bits, mean1_bytes, run_bytes, coeff_bytes, sign_bits, weight2_bits, weight3_bits, weight4_bits, weight8_bytes; + bool use_fast_decoding = false; + + if (first_comp_byte == (uint8_t)xuastc_ldr_syntax::cHybridArithZStd) + { + if (comp_data_size_all < sizeof(xuastc_ldr_arith_header)) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + pHdr = (const xuastc_ldr_arith_header*)pComp_data_all; + + if (pHdr->m_arith_bytes_len < arith::ArithMinExpectedDataBufSize) + { + BASISU_DEVEL_ERROR("Invalid header\n"); + return false; + } + + const uint64_t total_comp_size = (uint64_t)((uint32_t)pHdr->m_arith_bytes_len) + + pHdr->m_mean0_bits_len + pHdr->m_mean1_bytes_len + + pHdr->m_run_bytes_len + pHdr->m_coeff_bytes_len + pHdr->m_sign_bits_len + + pHdr->m_weight2_bits_len + pHdr->m_weight3_bits_len + pHdr->m_weight4_bits_len + pHdr->m_weight8_bytes_len; + + if ((sizeof(xuastc_ldr_arith_header) + total_comp_size) > comp_data_size_all) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + pComp_data = pComp_data_all + sizeof(xuastc_ldr_arith_header); + comp_data_size = pHdr->m_arith_bytes_len; + + const uint8_t* pCur_buf = (const uint8_t*)pComp_data + comp_data_size; + + // mean0 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_mean0_bits_len, uncomp_mean0_bits); + if (!status) + return false; + pCur_buf += pHdr->m_mean0_bits_len; + mean0_bits.init(uncomp_mean0_bits); + } + + // mean1 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_mean1_bytes_len, uncomp_mean1_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_mean1_bytes_len; + mean1_bytes.init(uncomp_mean1_bytes); + } + + // run + { + bool status = zstd_decompress(pCur_buf, pHdr->m_run_bytes_len, uncomp_run_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_run_bytes_len; + run_bytes.init(uncomp_run_bytes); + } + + // coeff + { + bool status = zstd_decompress(pCur_buf, pHdr->m_coeff_bytes_len, uncomp_coeff_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_coeff_bytes_len; + coeff_bytes.init(uncomp_coeff_bytes); + } + + // sign + { + sign_bits.init(pCur_buf, pHdr->m_sign_bits_len); + pCur_buf += pHdr->m_sign_bits_len; + } + + // weight2 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_weight2_bits_len, uncomp_weight2_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_weight2_bits_len; + weight2_bits.init(uncomp_weight2_bytes); + } + + // weight3 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_weight3_bits_len, uncomp_weight3_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_weight3_bits_len; + weight3_bits.init(uncomp_weight3_bytes); + } + + // weight4 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_weight4_bits_len, uncomp_weight4_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_weight4_bits_len; + weight4_bits.init(uncomp_weight4_bytes); + } + + // weight8 + { + bool status = zstd_decompress(pCur_buf, pHdr->m_weight8_bytes_len, uncomp_weight8_bytes); + if (!status) + return false; + pCur_buf += pHdr->m_weight8_bytes_len; + weight8_bytes.init(uncomp_weight8_bytes); + } + + // sanity check + const uint64_t total_read_size = pCur_buf - pComp_data_all; + if (total_read_size > comp_data_size_all) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + use_fast_decoding = true; + } + + if (comp_data_size < arith::ArithMinExpectedDataBufSize) + { + BASISU_DEVEL_ERROR("Compressed file is too small\n"); + return false; + } + + //interval_timer itm; + //itm.start(); + + arith::arith_dec dec; + if (!dec.init(pComp_data, comp_data_size)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid compressed data\n"); + return false; + } + + const uint32_t header_val = dec.get_bits(ARITH_HEADER_MARKER_BITS); + if (header_val != ARITH_HEADER_MARKER) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Unexpected header marker\n"); + return false; + } + + const uint32_t astc_block_size_index = dec.get_bits(4); + if (astc_block_size_index >= astc_helpers::NUM_ASTC_BLOCK_SIZES) + { + BASISU_DEVEL_ERROR("Invalid block dimension index\n"); + return false; + } + + const uint32_t block_width = astc_helpers::g_astc_block_sizes[astc_block_size_index][0]; + const uint32_t block_height = astc_helpers::g_astc_block_sizes[astc_block_size_index][1]; + + // sanity checks + assert((int)astc_block_size_index == astc_helpers::find_astc_block_size_index(block_width, block_height)); + assert(astc_helpers::is_valid_block_size(block_width, block_height)); + + astc_block_width = block_width; + astc_block_height = block_height; + + //const uint32_t total_block_pixels = block_width * block_height; + + uses_srgb_astc_decode_mode = dec.get_bit(); + + //const astc_helpers::decode_mode dec_mode = uses_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8; + + const uint32_t width = dec.get_bits(16); + const uint32_t height = dec.get_bits(16); + + if ((width < 1) || (height < 1)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid image dimension\n"); + return false; + } + + actual_width = width; + actual_height = height; + + has_alpha = dec.get_bit(); + + const bool use_dct = (dec.get_bits(1) != 0); + + int int_q = 0; + if (use_dct) + int_q = dec.get_bits(8); + + const float dct_q = (float)int_q / 2.0f; + if ((use_dct) && ((dct_q <= 0.0f) || (dct_q > 100.0f))) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid DCT global quality factor\n"); + return false; + } + + if (debug_output) + { + basisu::fmt_debug_printf("astc_ldr_t::decompress_image: block dim: {}x{}, image dim: {}x{}, sRGB decode profile: {}, has_alpha: {}, dct: {} dct_q: {}\n", + block_width, block_height, + width, height, + uses_srgb_astc_decode_mode, has_alpha, + use_dct, dct_q); + } + + const uint32_t num_blocks_x = (width + block_width - 1) / block_width; + const uint32_t num_blocks_y = (height + block_height - 1) / block_height; + + if (pInit_callback) + { + if (!(*pInit_callback)(num_blocks_x, num_blocks_y, block_width, block_height, uses_srgb_astc_decode_mode, dct_q, has_alpha, pInit_callback_data)) + return false; + } + + assert((size_t)astc_block_size_index < std::size(g_encoder_trial_modes)); + const auto& encoder_trial_modes = g_encoder_trial_modes[astc_block_size_index]; + + assert((size_t)astc_block_size_index < std::size(g_grouped_encoder_trial_modes)); + const auto& grouped_encoder_trial_modes = g_grouped_encoder_trial_modes[astc_block_size_index]; + + arith::arith_data_model mode_model((uint32_t)xuastc_mode::cMODE_TOTAL); + + arith::arith_data_model solid_color_dpcm_model[4]; + for (uint32_t i = 0; i < 4; i++) + solid_color_dpcm_model[i].init(256, true); + + arith::arith_data_model raw_endpoint_models[astc_helpers::TOTAL_ENDPOINT_ISE_RANGES]; + for (uint32_t i = 0; i < astc_helpers::TOTAL_ENDPOINT_ISE_RANGES; i++) + raw_endpoint_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i)); + + arith::arith_data_model dpcm_endpoint_models[astc_helpers::TOTAL_ENDPOINT_ISE_RANGES]; + for (uint32_t i = 0; i < astc_helpers::TOTAL_ENDPOINT_ISE_RANGES; i++) + dpcm_endpoint_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE + i)); + + arith::arith_bit_model is_base_ofs_model; + arith::arith_bit_model use_dct_model[4]; + arith::arith_bit_model use_dpcm_endpoints_model; + + arith::arith_data_model cem_index_model[8]; + for (uint32_t i = 0; i < 8; i++) + cem_index_model[i].init(OTM_NUM_CEMS); + + arith::arith_data_model subset_index_model[OTM_NUM_SUBSETS]; + for (uint32_t i = 0; i < OTM_NUM_SUBSETS; i++) + subset_index_model[i].init(OTM_NUM_SUBSETS); + + arith::arith_data_model ccs_index_model[OTM_NUM_CCS]; + for (uint32_t i = 0; i < OTM_NUM_CCS; i++) + ccs_index_model[i].init(OTM_NUM_CCS); + + arith::arith_data_model grid_size_model[OTM_NUM_GRID_SIZES]; + for (uint32_t i = 0; i < OTM_NUM_GRID_SIZES; i++) + grid_size_model[i].init(OTM_NUM_GRID_SIZES); + + arith::arith_data_model grid_aniso_model[OTM_NUM_GRID_ANISOS]; + for (uint32_t i = 0; i < OTM_NUM_GRID_ANISOS; i++) + grid_aniso_model[i].init(OTM_NUM_GRID_ANISOS); + + arith::arith_data_model dct_run_len_model; // [0,63] or 64=EOB + arith::arith_data_model dct_coeff_mag; // [1,255] (blocks with larger mags go DPCM) + arith::arith_data_model weight_mean_models[2]; + arith::arith_data_model raw_weight_models[astc_helpers::TOTAL_WEIGHT_ISE_RANGES]; + + if (!use_fast_decoding) + { + // Models used for weight decompression in pure arithmetic mode. + dct_run_len_model.init(65); + dct_coeff_mag.init(255); + + weight_mean_models[0].init(DCT_MEAN_LEVELS0); + weight_mean_models[1].init(DCT_MEAN_LEVELS1); + + for (uint32_t i = 0; i < astc_helpers::TOTAL_WEIGHT_ISE_RANGES; i++) + raw_weight_models[i].init(astc_helpers::get_ise_levels(astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE + i)); + } + + const grid_weight_dct& grid_dct = g_grid_weight_dcts[astc_block_size_index]; + + basisu::vector2D log_blocks; + if (!log_blocks.try_resize(num_blocks_x, 8)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: out of memory\n"); + return false; + } + + memset(log_blocks.get_ptr(), 0, log_blocks.size_in_bytes()); + + basisu::vector2D prev_block_states; + if (!prev_block_states.try_resize(num_blocks_x, 2)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: out of memory\n"); + return false; + } + + arith::arith_data_model submode_models[OTM_NUM_CEMS][OTM_NUM_SUBSETS][OTM_NUM_CCS][OTM_NUM_GRID_SIZES][OTM_NUM_GRID_ANISOS]; + + arith::arith_bit_model endpoints_use_bc_models[4]; + + arith::arith_data_model endpoint_reuse_delta_model(basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS); + + arith::arith_data_model config_reuse_model[4]; + for (uint32_t i = 0; i < 4; i++) + config_reuse_model[i].init(4); + + arith::arith_gamma_contexts m_run_len_contexts; + uint32_t cur_run_len = 0; + + int part2_hash[PART_HASH_SIZE]; + std::fill(part2_hash, part2_hash + PART_HASH_SIZE, -1); + + int part3_hash[PART_HASH_SIZE]; + std::fill(part3_hash, part3_hash + PART_HASH_SIZE, -1); + + arith::arith_bit_model use_part_hash_model[4]; + arith::arith_data_model part2_hash_index_model(PART_HASH_SIZE, true); + arith::arith_data_model part3_hash_index_model(PART_HASH_SIZE, true); + + //if (debug_output) + // debug_printf("Decompressor init time finish: {} secs\n", itm.get_elapsed_secs()); + + //itm.start(); + + dct_syms syms; + + fvec dct_work; + + for (uint32_t by = 0; by < num_blocks_y; by++) + { + for (uint32_t bx = 0; bx < num_blocks_x; bx++) + { + prev_block_state& new_prev_state = prev_block_states(bx, by & 1); + new_prev_state.clear(); + + const prev_block_state* pLeft_state = bx ? &prev_block_states(bx - 1, by & 1) : nullptr; + const prev_block_state* pUpper_state = by ? &prev_block_states(bx, (by - 1) & 1) : nullptr; + const prev_block_state* pDiag_state = (bx && by) ? &prev_block_states(bx - 1, (by - 1) & 1) : nullptr; + const prev_block_state* pPred_state = pLeft_state ? pLeft_state : pUpper_state; // left or upper, or nullptr on first block + + astc_helpers::log_astc_block& log_blk = log_blocks(bx, by & 7); + + if (cur_run_len) + { + const prev_block_state* pPrev_block_state = pLeft_state ? pLeft_state : pUpper_state; + const astc_helpers::log_astc_block& prev_log_blk = bx ? log_blocks(bx - 1, by & 7) : log_blocks(bx, (by - 1) & 7); + + log_blk = prev_log_blk; + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, prev_log_blk, pBlock_callback_data)) + return false; + } + + new_prev_state.m_was_solid_color = pPrev_block_state->m_was_solid_color; + new_prev_state.m_used_weight_dct = pPrev_block_state->m_used_weight_dct; + new_prev_state.m_first_endpoint_uses_bc = pPrev_block_state->m_first_endpoint_uses_bc; + new_prev_state.m_reused_full_cfg = true; + new_prev_state.m_tm_index = pPrev_block_state->m_tm_index; + new_prev_state.m_base_cem_index = pPrev_block_state->m_base_cem_index; + new_prev_state.m_subset_index = pPrev_block_state->m_subset_index; + new_prev_state.m_ccs_index = pPrev_block_state->m_ccs_index; + new_prev_state.m_grid_size = pPrev_block_state->m_grid_size; + new_prev_state.m_grid_aniso = pPrev_block_state->m_grid_aniso; + new_prev_state.m_used_part_hash = pPrev_block_state->m_used_part_hash; + + cur_run_len--; + continue; + } + + log_blk.clear(); + + uint32_t mode_index = dec.decode_sym(mode_model); + + switch (mode_index) + { + case (uint32_t)xuastc_mode::cMODE_SOLID: + { + const astc_helpers::log_astc_block* pPrev_log_blk = bx ? &log_blocks(bx - 1, by & 7) : (by ? &log_blocks(bx, (by - 1) & 7) : nullptr); + + uint32_t prev_solid_color[4] = { 0 }; + + if (pPrev_log_blk) + { + if (pPrev_log_blk->m_solid_color_flag_ldr) + { + prev_solid_color[0] = pPrev_log_blk->m_solid_color[0] >> 8; + prev_solid_color[1] = pPrev_log_blk->m_solid_color[1] >> 8; + prev_solid_color[2] = pPrev_log_blk->m_solid_color[2] >> 8; + prev_solid_color[3] = pPrev_log_blk->m_solid_color[3] >> 8; + } + else + { + // Decode previous block's first CEM, use the halfway point as the predictor. + color_rgba prev_l, prev_h; + decode_endpoints(pPrev_log_blk->m_color_endpoint_modes[0], pPrev_log_blk->m_endpoints, pPrev_log_blk->m_endpoint_ise_range, prev_l, prev_h); + + prev_solid_color[0] = (prev_l[0] + prev_h[0] + 1) >> 1; + prev_solid_color[1] = (prev_l[1] + prev_h[1] + 1) >> 1; + prev_solid_color[2] = (prev_l[2] + prev_h[2] + 1) >> 1; + prev_solid_color[3] = (prev_l[3] + prev_h[3] + 1) >> 1; + } + } + + uint32_t r = (prev_solid_color[0] + dec.decode_sym(solid_color_dpcm_model[0])) & 0xFF; + uint32_t g = (prev_solid_color[1] + dec.decode_sym(solid_color_dpcm_model[1])) & 0xFF; + uint32_t b = (prev_solid_color[2] + dec.decode_sym(solid_color_dpcm_model[2])) & 0xFF; + + uint32_t a = 255; + if (has_alpha) + a = (prev_solid_color[3] + dec.decode_sym(solid_color_dpcm_model[3])) & 0xFF; + + log_blk.m_solid_color_flag_ldr = true; + log_blk.m_solid_color[0] = (uint16_t)(r | (r << 8)); + log_blk.m_solid_color[1] = (uint16_t)(g | (g << 8)); + log_blk.m_solid_color[2] = (uint16_t)(b | (b << 8)); + log_blk.m_solid_color[3] = (uint16_t)(a | (a << 8)); + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, log_blk, pBlock_callback_data)) + return false; + } + + // Bias the statistics towards using DCT (most common case). + if (use_dct) + new_prev_state.m_used_weight_dct = true; + + new_prev_state.m_first_endpoint_uses_bc = true; + new_prev_state.m_was_solid_color = true; + new_prev_state.m_tm_index = -1; + new_prev_state.m_base_cem_index = astc_helpers::CEM_LDR_RGB_DIRECT; + new_prev_state.m_subset_index = 0; + new_prev_state.m_ccs_index = 0; + new_prev_state.m_grid_size = 0; + new_prev_state.m_grid_aniso = 0; + new_prev_state.m_reused_full_cfg = false; + new_prev_state.m_used_part_hash = true; // bias to true + + break; + } + case (uint32_t)xuastc_mode::cMODE_RUN: + { + if (!bx && !by) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid run command\n"); + return false; + } + + cur_run_len = dec.decode_gamma(m_run_len_contexts); + if (!cur_run_len) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid run len\n"); + return false; + } + + const uint32_t max_possible_run_len = num_blocks_x - bx; + if (cur_run_len > max_possible_run_len) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid run len\n"); + return false; + } + + const prev_block_state* pPrev_block_state = pLeft_state ? pLeft_state : pUpper_state; + const astc_helpers::log_astc_block& prev_log_blk = bx ? log_blocks(bx - 1, by & 7) : log_blocks(bx, (by - 1) & 7); + + log_blk = prev_log_blk; + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, prev_log_blk, pBlock_callback_data)) + return false; + } + + new_prev_state.m_was_solid_color = pPrev_block_state->m_was_solid_color; + new_prev_state.m_used_weight_dct = pPrev_block_state->m_used_weight_dct; + new_prev_state.m_first_endpoint_uses_bc = pPrev_block_state->m_first_endpoint_uses_bc; + new_prev_state.m_reused_full_cfg = true; + new_prev_state.m_tm_index = pPrev_block_state->m_tm_index; + new_prev_state.m_base_cem_index = pPrev_block_state->m_base_cem_index; + new_prev_state.m_subset_index = pPrev_block_state->m_subset_index; + new_prev_state.m_ccs_index = pPrev_block_state->m_ccs_index; + new_prev_state.m_grid_size = pPrev_block_state->m_grid_size; + new_prev_state.m_grid_aniso = pPrev_block_state->m_grid_aniso; + new_prev_state.m_used_part_hash = pPrev_block_state->m_used_part_hash; + + cur_run_len--; + + break; + } + case (uint32_t)xuastc_mode::cMODE_RAW: + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_LEFT: + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_UP: + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_DIAG: + { + uint32_t tm_index = 0; + uint32_t actual_cem = 0; + + if (mode_index != (uint32_t)xuastc_mode::cMODE_RAW) + { + // Full config+part ID+endpoint reuse from an immediate neighbor + // + // 0 = left, 1 = upper, 2 = left-upper + int cfg_dx = 0, cfg_dy = 0; + const prev_block_state* pCfg_state = nullptr; + + switch (mode_index) + { + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_LEFT: cfg_dx = -1; pCfg_state = pLeft_state; break; + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_UP: cfg_dx = 0; cfg_dy = -1; pCfg_state = pUpper_state; break; + case (uint32_t)xuastc_mode::cMODE_REUSE_CFG_ENDPOINTS_DIAG: cfg_dx = -1; cfg_dy = -1; pCfg_state = pDiag_state; break; + default: assert(0); break; + } + + if ((((cfg_dx + (int)bx) < 0) || + ((cfg_dy + (int)by) < 0)) || + (!pCfg_state)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid config reuse\n"); + return false; + } + + if (pCfg_state->m_tm_index < 0) + { + BASISU_DEVEL_ERROR("astc_ldr_t::xuastc_ldr_decompress_image::Invalid config reuse\n"); + return false; + } + + const astc_helpers::log_astc_block& cfg_log_blk = log_blocks((int)bx + cfg_dx, ((int)by + cfg_dy) & 7); + + tm_index = pCfg_state->m_tm_index; + actual_cem = cfg_log_blk.m_color_endpoint_modes[0]; + + for (uint32_t i = 0; i < cfg_log_blk.m_num_partitions; i++) + log_blk.m_color_endpoint_modes[i] = (uint8_t)actual_cem; + log_blk.m_dual_plane = cfg_log_blk.m_dual_plane; + log_blk.m_color_component_selector = cfg_log_blk.m_color_component_selector; + log_blk.m_num_partitions = cfg_log_blk.m_num_partitions; + log_blk.m_partition_id = cfg_log_blk.m_partition_id; + log_blk.m_endpoint_ise_range = cfg_log_blk.m_endpoint_ise_range; + log_blk.m_weight_ise_range = cfg_log_blk.m_weight_ise_range; + log_blk.m_grid_width = cfg_log_blk.m_grid_width; + log_blk.m_grid_height = cfg_log_blk.m_grid_height; + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(actual_cem) * log_blk.m_num_partitions; + memcpy(log_blk.m_endpoints, cfg_log_blk.m_endpoints, total_endpoint_vals); + + new_prev_state.m_tm_index = pCfg_state->m_tm_index; + new_prev_state.m_base_cem_index = pCfg_state->m_base_cem_index; // base cem not including base+ofs, not actual + new_prev_state.m_subset_index = pCfg_state->m_subset_index; + new_prev_state.m_ccs_index = pCfg_state->m_ccs_index; + new_prev_state.m_grid_size = pCfg_state->m_grid_size; + new_prev_state.m_grid_aniso = pCfg_state->m_grid_aniso; + new_prev_state.m_used_part_hash = pCfg_state->m_used_part_hash; + new_prev_state.m_reused_full_cfg = true; + + const bool actual_cem_supports_bc = astc_helpers::cem_supports_bc(actual_cem); + if (actual_cem_supports_bc) + { + new_prev_state.m_first_endpoint_uses_bc = astc_helpers::used_blue_contraction(actual_cem, log_blk.m_endpoints, log_blk.m_endpoint_ise_range); + assert(new_prev_state.m_first_endpoint_uses_bc == pCfg_state->m_first_endpoint_uses_bc); + } + } + else + { + uint32_t reused_full_cfg_model_index = 0; + if (pLeft_state) + reused_full_cfg_model_index = pLeft_state->m_reused_full_cfg; + else + reused_full_cfg_model_index = 1; + + if (pUpper_state) + reused_full_cfg_model_index |= pUpper_state->m_reused_full_cfg ? 2 : 0; + else + reused_full_cfg_model_index |= 2; + + const uint32_t config_reuse_index = dec.decode_sym(config_reuse_model[reused_full_cfg_model_index]); + + // TODO: Shared with encoder, make global constant + + //if (config_reuse_index < ldr_astc_block_encode_image_output::cMaxConfigReuseNeighbors) + if (config_reuse_index < cMaxConfigReuseNeighbors) + { + // 0 = left, 1 = upper, 2 = left-upper + int cfg_dx = 0, cfg_dy = 0; + const prev_block_state* pCfg_state = nullptr; + + switch (config_reuse_index) + { + case 0: cfg_dx = -1; pCfg_state = pLeft_state; break; + case 1: cfg_dx = 0; cfg_dy = -1; pCfg_state = pUpper_state; break; + case 2: cfg_dx = -1; cfg_dy = -1; pCfg_state = pDiag_state; break; + default: assert(0); break; + } + + if ((((cfg_dx + (int)bx) < 0) || + ((cfg_dy + (int)by) < 0)) || + (!pCfg_state)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid config reuse\n"); + return false; + } + + if (pCfg_state->m_tm_index < 0) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid config reuse\n"); + return false; + } + + astc_helpers::log_astc_block& cfg_log_blk = log_blocks((int)bx + cfg_dx, ((int)by + cfg_dy) & 7); + + tm_index = pCfg_state->m_tm_index; + log_blk.m_partition_id = cfg_log_blk.m_partition_id; + actual_cem = cfg_log_blk.m_color_endpoint_modes[0]; + + new_prev_state.m_tm_index = pCfg_state->m_tm_index; + new_prev_state.m_base_cem_index = pCfg_state->m_base_cem_index; // base cem not including base+ofs, not actual + new_prev_state.m_subset_index = pCfg_state->m_subset_index; + new_prev_state.m_ccs_index = pCfg_state->m_ccs_index; + new_prev_state.m_grid_size = pCfg_state->m_grid_size; + new_prev_state.m_grid_aniso = pCfg_state->m_grid_aniso; + new_prev_state.m_used_part_hash = pCfg_state->m_used_part_hash; + new_prev_state.m_reused_full_cfg = true; + } + else + { + // -------------------- Decode full ASTC config + { + uint32_t prev_cem_index = astc_helpers::CEM_LDR_RGB_DIRECT; + uint32_t prev_subset_index = 0, prev_ccs_index = 0, prev_grid_size = 0, prev_grid_aniso = 0; + + if (pPred_state) + { + prev_cem_index = pPred_state->m_base_cem_index; + prev_subset_index = pPred_state->m_subset_index; + prev_ccs_index = pPred_state->m_ccs_index; + prev_grid_size = pPred_state->m_grid_size; + prev_grid_aniso = pPred_state->m_grid_aniso; + } + + const uint32_t ldrcem_index = cem_to_ldrcem_index(prev_cem_index); + + uint32_t cem_index = dec.decode_sym(cem_index_model[ldrcem_index]); + uint32_t subset_index = dec.decode_sym(subset_index_model[prev_subset_index]); + uint32_t ccs_index = dec.decode_sym(ccs_index_model[prev_ccs_index]); + uint32_t grid_size_index = dec.decode_sym(grid_size_model[prev_grid_size]); + uint32_t grid_aniso_index = dec.decode_sym(grid_aniso_model[prev_grid_aniso]); + + const basisu::uint_vec& modes = get_tm_candidates(grouped_encoder_trial_modes, cem_index, subset_index, ccs_index, grid_size_index, grid_aniso_index); + uint32_t submode_index = 0; + + if (modes.size() > 1) + { + arith::arith_data_model& submode_model = submode_models[cem_index][subset_index][ccs_index][grid_size_index][grid_aniso_index]; + if (!submode_model.get_num_data_syms()) + submode_model.init(modes.size_u32(), true); + + submode_index = dec.decode_sym(submode_model); + } + + if (submode_index >= modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid mode index\n"); + return false; + } + + tm_index = modes[submode_index]; + + new_prev_state.m_tm_index = tm_index; + new_prev_state.m_base_cem_index = cem_index; + new_prev_state.m_subset_index = subset_index; + new_prev_state.m_ccs_index = ccs_index; + new_prev_state.m_grid_size = grid_size_index; + new_prev_state.m_grid_aniso = grid_aniso_index; + new_prev_state.m_reused_full_cfg = false; + } + + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: invalid tm_index, decompression failed (file corrupt)\n"); + return false; + } + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + actual_cem = tm.m_cem; + if ((tm.m_cem == astc_helpers::CEM_LDR_RGB_DIRECT) || (tm.m_cem == astc_helpers::CEM_LDR_RGBA_DIRECT)) + { + // Decode is_base_ofs bit + bool is_base_ofs = dec.decode_bit(is_base_ofs_model); + if (is_base_ofs) + { + if (actual_cem == astc_helpers::CEM_LDR_RGB_DIRECT) + actual_cem = astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET; + else if (actual_cem == astc_helpers::CEM_LDR_RGBA_DIRECT) + actual_cem = astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET; + } + } + + if (tm.m_num_parts > 1) + { + const uint32_t total_unique_indices = get_total_unique_patterns(astc_block_size_index, tm.m_num_parts); + + uint32_t use_part_model_index = 0; + if (pLeft_state) + use_part_model_index = pLeft_state->m_used_part_hash; + else + use_part_model_index = 1; + if (pUpper_state) + use_part_model_index |= pUpper_state->m_used_part_hash ? 2 : 0; + else + use_part_model_index |= 2; + + int* pPart_hash = (tm.m_num_parts == 2) ? part2_hash : part3_hash; + + bool use_part_hash_flag = dec.decode_bit(use_part_hash_model[use_part_model_index]); + + uint32_t unique_pat_index; + if (!use_part_hash_flag) + { + unique_pat_index = dec.decode_truncated_binary(total_unique_indices); + pPart_hash[part_hash_index(unique_pat_index)] = unique_pat_index; + + new_prev_state.m_used_part_hash = false; + } + else + { + uint32_t hash_index = dec.decode_sym((tm.m_num_parts == 2) ? part2_hash_index_model : part3_hash_index_model); + unique_pat_index = pPart_hash[hash_index]; + + if ((int)unique_pat_index < 0) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: invalid hash_index, decompression failed (file corrupt)\n"); + return false; + } + + new_prev_state.m_used_part_hash = true; + } + + if (unique_pat_index >= get_total_unique_patterns(astc_block_size_index, tm.m_num_parts)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: invalid unique_pat_index, decompression failed (file corrupt)\n"); + return false; + } + + log_blk.m_partition_id = unique_pat_index_to_part_seed(astc_block_size_index, tm.m_num_parts, unique_pat_index); + } + else + { + new_prev_state.m_used_part_hash = true; // bias to true + } + + } // if (config_reuse_index < ldr_astc_block_encode_image_output::cMaxConfigReuseNeighbors) + + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: invalid tm_index, decompression failed (file corrupt)\n"); + return false; + } + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + const bool actual_cem_supports_bc = astc_helpers::cem_supports_bc(actual_cem); + + const uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(actual_cem); + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + log_blk.m_color_endpoint_modes[part_iter] = (uint8_t)actual_cem; + + log_blk.m_num_partitions = (uint8_t)tm.m_num_parts; + log_blk.m_dual_plane = (tm.m_ccs_index >= 0); + if (log_blk.m_dual_plane) + log_blk.m_color_component_selector = (uint8_t)tm.m_ccs_index; + + log_blk.m_weight_ise_range = (uint8_t)tm.m_weight_ise_range; + log_blk.m_endpoint_ise_range = (uint8_t)tm.m_endpoint_ise_range; + log_blk.m_grid_width = (uint8_t)tm.m_grid_width; + log_blk.m_grid_height = (uint8_t)tm.m_grid_height; + + // --------------------------------- Decode endpoints + const bool used_dpcm_endpoints_flag = dec.decode_bit(use_dpcm_endpoints_model); + + if (!used_dpcm_endpoints_flag) + { + auto& raw_model = raw_endpoint_models[log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE]; + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + log_blk.m_endpoints[part_iter * total_endpoint_vals + val_iter] = (uint8_t)dec.decode_sym(raw_model); + } // val_iter + + } // part_iter + } + else + { + // Endpoint DPCM + const int num_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range); + const auto& endpoint_rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE; + const auto& endpoint_ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank; + + const uint32_t reuse_delta_index = dec.decode_sym(endpoint_reuse_delta_model); + const int reuse_bx = (int)bx + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_delta_index].m_x; + const int reuse_by = (int)by + basist::astc_6x6_hdr::g_reuse_xy_deltas[reuse_delta_index].m_y; + + if ((reuse_bx < 0) || (reuse_by < 0) || (reuse_bx >= (int)num_blocks_x) || (reuse_by >= (int)num_blocks_y)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid reuse delta\n"); + return false; + } + + const astc_helpers::log_astc_block* pEndpoint_pred_log_blk = &log_blocks(reuse_bx, reuse_by & 7); + if (pEndpoint_pred_log_blk->m_solid_color_flag_ldr) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid reuse delta\n"); + return false; + } + + uint32_t bc_model_index = 0; + if (pLeft_state) + bc_model_index = pLeft_state->m_first_endpoint_uses_bc; + else + bc_model_index = 1; + + if (pUpper_state) + bc_model_index |= pUpper_state->m_first_endpoint_uses_bc ? 2 : 0; + else + bc_model_index |= 2; + + if (!pEndpoint_pred_log_blk) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Can't use endpoint DPCM here\n"); + return false; + } + + bool endpoints_use_bc[astc_helpers::MAX_PARTITIONS] = { false }; + + if (actual_cem_supports_bc) + { + for (uint32_t part_iter = 0; part_iter < log_blk.m_num_partitions; part_iter++) + { + endpoints_use_bc[part_iter] = dec.decode_bit(endpoints_use_bc_models[bc_model_index]); + } + } + + uint8_t predicted_endpoints[astc_helpers::MAX_PARTITIONS][astc_helpers::MAX_CEM_ENDPOINT_VALS] = { }; + + for (uint32_t part_iter = 0; part_iter < log_blk.m_num_partitions; part_iter++) + { + const bool always_repack_flag = false; + bool blue_contraction_clamped_flag = false, base_ofs_clamped_flag = false; + + // Mini-CEM encoder, to cross CEM domains. + bool conv_status = convert_endpoints_across_cems( + pEndpoint_pred_log_blk->m_color_endpoint_modes[0], pEndpoint_pred_log_blk->m_endpoint_ise_range, pEndpoint_pred_log_blk->m_endpoints, + log_blk.m_color_endpoint_modes[0], log_blk.m_endpoint_ise_range, predicted_endpoints[part_iter], + always_repack_flag, + endpoints_use_bc[part_iter], false, + blue_contraction_clamped_flag, base_ofs_clamped_flag); + + if (!conv_status) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Failed predicting endpoints\n"); + return false; + } + } + + auto& dpcm_model = dpcm_endpoint_models[log_blk.m_endpoint_ise_range - astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE]; + + for (uint32_t part_iter = 0; part_iter < tm.m_num_parts; part_iter++) + { + for (uint32_t val_iter = 0; val_iter < total_endpoint_vals; val_iter++) + { + const uint32_t endpoint_idx = part_iter * total_endpoint_vals + val_iter; + + int delta = (uint8_t)dec.decode_sym(dpcm_model); + int e_val = basisu::imod(delta + endpoint_ise_to_rank[predicted_endpoints[part_iter][val_iter]], num_endpoint_levels); + + log_blk.m_endpoints[endpoint_idx] = endpoint_rank_to_ise[e_val]; + + } // val_iter + + } // part_iter + + } // if (!used_dpcm_endpoints_flag) + + if (actual_cem_supports_bc) + { + new_prev_state.m_first_endpoint_uses_bc = astc_helpers::used_blue_contraction(actual_cem, log_blk.m_endpoints, log_blk.m_endpoint_ise_range); + } + + } // if (mode_index != cMODE_RAW) + + // ----------------------------------- Decode weights + if (tm_index >= encoder_trial_modes.size()) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image: invalid tm_index, decompression failed (file corrupt)\n"); + return false; + } + + const trial_mode& tm = encoder_trial_modes[tm_index]; + + const uint32_t total_planes = (tm.m_ccs_index >= 0) ? 2 : 1; + const uint32_t total_weights = tm.m_grid_width * tm.m_grid_height; + + uint32_t use_dct_model_index = 0; + if (use_dct) + { + if (pLeft_state) + use_dct_model_index = pLeft_state->m_used_weight_dct; + else + use_dct_model_index = 1; + + if (pUpper_state) + use_dct_model_index |= pUpper_state->m_used_weight_dct ? 2 : 0; + else + use_dct_model_index |= 2; + } + + bool block_used_dct = false; + if (use_dct) + block_used_dct = dec.decode_bit(use_dct_model[use_dct_model_index]); + + if (use_fast_decoding) + { + if (block_used_dct) + { + new_prev_state.m_used_weight_dct = true; + + const astc_block_grid_data* pGrid_data = find_astc_block_grid_data(block_width, block_height, log_blk.m_grid_width, log_blk.m_grid_height); + + const uint32_t num_dc_levels = grid_weight_dct::get_num_weight_dc_levels(log_blk.m_weight_ise_range); + syms.m_num_dc_levels = num_dc_levels; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + syms.m_coeffs.resize(0); + + if (num_dc_levels == DCT_MEAN_LEVELS1) + syms.m_dc_sym = mean1_bytes.get_bits8(); + else + syms.m_dc_sym = mean0_bits.get_bits4(); + + uint32_t cur_zig_ofs = 1; + + while (cur_zig_ofs < total_weights) + { + uint32_t run_len = run_bytes.get_bits8(); + if (run_len == DCT_RUN_LEN_EOB_SYM_INDEX) + break; + + cur_zig_ofs += run_len; + + if (cur_zig_ofs >= total_weights) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::DCT decode error\n"); + return false; + } + + int sign = sign_bits.get_bits1(); + int coeff = coeff_bytes.get_bits8() + 1; + + if (sign) + coeff = -coeff; + + syms.m_coeffs.push_back(dct_syms::coeff(basisu::safe_cast_uint16(run_len), basisu::safe_cast_int16(coeff))); + cur_zig_ofs++; + } + + // weight grid IDCT + if (!grid_dct.decode_block_weights(dct_q, plane_iter, log_blk, nullptr, pGrid_data, nullptr, dct_work, &syms)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::DCT decode failed\n"); + return false; + } + + } // plane_iter + } + else + { + // Weight grid DPCM (no dependency on other blocks, or between planes, for determinism even when IDCT is used) + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(log_blk.m_weight_ise_range); + const auto& weight_rank_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_rank_to_ISE; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + if (num_weight_levels <= 4) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight2_bits.get_bits2(); + + uint32_t w = r; + w = basisu::imod(prev_w + r, num_weight_levels); + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels <= 8) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight3_bits.get_bits4(); + + uint32_t w = r; + w = basisu::imod(prev_w + r, num_weight_levels); + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else if (num_weight_levels <= 16) + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight4_bits.get_bits4(); + + uint32_t w = r; + w = basisu::imod(prev_w + r, num_weight_levels); + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + else + { + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = weight8_bytes.get_bits8(); + + uint32_t w = r; + w = basisu::imod(prev_w + r, num_weight_levels); + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + } + + } // plane_iter + + } // if (block_used_dct) + } + else + { + if (block_used_dct) + { + new_prev_state.m_used_weight_dct = true; + + const astc_block_grid_data* pGrid_data = find_astc_block_grid_data(block_width, block_height, log_blk.m_grid_width, log_blk.m_grid_height); + + const uint32_t num_dc_levels = grid_weight_dct::get_num_weight_dc_levels(log_blk.m_weight_ise_range); + syms.m_num_dc_levels = num_dc_levels; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + syms.m_coeffs.resize(0); + + syms.m_dc_sym = dec.decode_sym(weight_mean_models[(num_dc_levels == DCT_MEAN_LEVELS1) ? 1 : 0]); + + uint32_t cur_zig_ofs = 1; + + while (cur_zig_ofs < total_weights) + { + uint32_t run_len = dec.decode_sym(dct_run_len_model); + if (run_len == DCT_RUN_LEN_EOB_SYM_INDEX) + break; + + cur_zig_ofs += run_len; + + if (cur_zig_ofs >= total_weights) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::DCT decode error\n"); + return false; + } + + int sign = dec.get_bit(); + int coeff = dec.decode_sym(dct_coeff_mag) + 1; + + if (sign) + coeff = -coeff; + + syms.m_coeffs.push_back(dct_syms::coeff(basisu::safe_cast_uint16(run_len), basisu::safe_cast_int16(coeff))); + cur_zig_ofs++; + } + + // weight grid IDCT + if (!grid_dct.decode_block_weights(dct_q, plane_iter, log_blk, nullptr, pGrid_data, nullptr, dct_work, &syms)) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::DCT decode failed\n"); + return false; + } + + } // plane_iter + } + else + { + // Weight grid DPCM (no dependency on other blocks, or between planes, for determinism even when IDCT is used) + const uint32_t num_weight_levels = astc_helpers::get_ise_levels(log_blk.m_weight_ise_range); + const auto& weight_rank_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_rank_to_ISE; + + for (uint32_t plane_iter = 0; plane_iter < total_planes; plane_iter++) + { + int prev_w = num_weight_levels / 2; + + for (uint32_t weight_iter = 0; weight_iter < total_weights; weight_iter++) + { + uint32_t r = dec.decode_sym(raw_weight_models[log_blk.m_weight_ise_range - astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE]); + + uint32_t w = r; + w = basisu::imod(prev_w + r, num_weight_levels); + + prev_w = w; + + log_blk.m_weights[plane_iter + weight_iter * total_planes] = (uint8_t)weight_rank_to_ise[w]; + + } // weight_iter + + } // plane_iter + } + + } // use_fast_decoding + + if (pBlock_callback) + { + if (!(*pBlock_callback)(bx, by, log_blk, pBlock_callback_data)) + return false; + } + + break; + } + default: + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Invalid mode\n"); + return false; + } + } + + } // bx + + assert(!cur_run_len); + + } // by + + //if (debug_output) + // debug_printf("Decomp time: {3.3}ms\n", itm.get_elapsed_ms()); + + const uint32_t final_sync_marker = dec.get_bits(FINAL_SYNC_MARKER_BITS); + if (final_sync_marker != FINAL_SYNC_MARKER) + { + BASISU_DEVEL_ERROR("astc_ldr_t::decompress_image::Final sync failed\n"); + return false; + } + + if (debug_output) + basisu::debug_printf("astc_ldr_t::decompress_image: Decode sync OK\n"); + + return true; + } + +} // namespace astc_ldr_t + +#endif // #if BASISD_SUPPORT_XUASTC + +#if BASISD_SUPPORT_XUASTC + +namespace bc7u +{ + //------------------------------------------------------------------------------------------------ + // BC7 mode 0-7 decompression. + // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + + static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } + static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } + static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } + static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } + static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) + { + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; + } + + inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = basisu::minimum(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; + } + + struct bc7_mode_6 + { + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; + + bool unpack_bc7_mode6(const void* pBlock_bits, color_rgba* pPixels) + { + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6& block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = basist::g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; + } + + int determine_bc7_mode(const void* pBlock) + { + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + if (first_byte & (1U << mode)) + return mode; + } + + return -1; + } + + int determine_bc7_mode_4_index_mode(const void* pBlock) + { + const uint32_t first_byte = static_cast(pBlock)[0]; + + // check for mode 4 + if ((first_byte & 31) != 0b10000) + return -1; + + return (first_byte >> 7); + } + + int determine_bc7_mode_4_or_5_rotation(const void* pBlock) + { + const uint32_t first_byte = static_cast(pBlock)[0]; + if ((first_byte & 31) == 0b10000) + { + // mode 4 + return (first_byte >> 5) & 3; + } + + if ((first_byte & 63) == 0b100000) + { + // mode 5 + return first_byte >> 6; + } + + return -1; + } + + bool unpack_bc7(const void* pBlock, color_rgba* pPixels) + { + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + if (first_byte & (1U << mode)) + { + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } + } + } + + return false; + } +} + +// BASISU_BC7F_USE_SSE41 - only very minimally tested. Needs more testing and more variants. Only improves transcoding perf by ~10% in native so far. +#define BASISU_BC7F_USE_SSE41 (0) +#define BASISU_BC7F_PERF_STATS (0) + +namespace bc7f +{ + const uint32_t MAX_PATTERNS2_TO_CHECK = 64; + const uint32_t MAX_PATTERNS3_TO_CHECK = 64; + + const float UNIQUE_PBIT_DISCOUNT = .85f; + const float SHARED_PBIT_DISCOUNT = .95f; + + //static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + //static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; } + //static inline int lerp_8bit(int a, int b, int s) { assert(a >= 0 && a <= 255); assert(b >= 0 && b <= 255); assert(s >= 0 && s <= 255); return a + mul_8bit(b - a, s); } + + static int popcount32(uint32_t x) + { +#if defined(__EMSCRIPTEN__) || defined(__clang__) || defined(__GNUC__) + return __builtin_popcount(x); +#elif defined(_MSC_VER) + return __popcnt(x); +#else + int count = 0; + while (x) + { + x &= (x - 1); + ++count; + } + return count; +#endif + } + +#if BASISU_BC7F_PERF_STATS + // not thread safe (no need/for dev) + uint32_t g_total_rgb_calls; + uint32_t g_total_rgba_calls; + uint32_t g_total_solid_blocks; + + uint32_t g_total_trivial_mode6_blocks; + + uint32_t g_total_dp_valid_chans_rgb; + uint32_t g_total_dp_valid_chans_a; + uint32_t g_total_high_ortho_energy; + + uint32_t g_total_mode02_evals; + uint32_t g_total_mode02_bailouts; + + uint32_t g_total_mode13_evals; + uint32_t g_total_mode13_bailouts; + + uint32_t g_total_mode45_evals; + uint32_t g_total_mode45_bailouts; + + uint32_t g_total_mode7_evals; + uint32_t g_total_mode7_bailouts; +#endif + + inline int fast_roundf_pos_int(float x) + { + assert(x >= 0.0f); + return (int)(x + 0.5f); + } + + inline int fast_roundf_int(float x) + { + return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f); + } + + inline int fast_floorf_int(float x) + { + int xi = (int)x; // Truncate towards zero + return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi; + } + + static inline uint32_t from_7(uint32_t v) + { + assert(v < 128); + return (v << 1) | (v >> 6); + } + + static inline uint32_t from_7(uint32_t v, uint32_t p) + { + assert((v < 128) && (p <= 1)); + return (v << 1) | p; + } + + static inline int to_7(int c8, int pbit) + { + assert((c8 >= 0) && (c8 <= 255) && (pbit >= 0) && (pbit <= 1)); + uint32_t e = (uint32_t(c8) + uint32_t(pbit ^ 1)) >> 1; + return basisu::minimum(127, e); + } + + static inline int to_7(int c8) + { + assert((c8 >= 0) && (c8 <= 255)); + return (c8 * 127 + 127) / 255; + } + + static inline int to_7(float c, int pbit) + { + assert((c >= 0) && (c <= 255.0f)); + return to_7(fast_roundf_pos_int(c), pbit); + } + + static inline int to_7_clamp(float c, int pbit) + { + return to_7(basisu::clamp(fast_roundf_int(c), 0, 255), pbit); + } + + static inline int to_5(int c8) + { + assert((c8 >= 0) && (c8 <= 255)); + return (c8 * 31 + 127) / 255; + } + + static inline int to_5_clamp(float c) + { + return basisu::clamp(fast_roundf_int(c * (31.0f / 255.0f)), 0, 31); + } + + static inline int to_6(int c8) + { + assert((c8 >= 0) && (c8 <= 255)); + return (c8 * 63 + 127) / 255; + } + + static inline int to_6(int c8, int pbit) + { + assert((c8 >= 0) && (c8 <= 255)); + assert((pbit == 0) || (pbit == 1)); + + int q7 = (c8 * 127 + 127) / 255; + + if ((q7 & 1) != pbit) + { + const int lhs = c8 * 127; + const int rhs = 255 * q7; + + if (lhs >= rhs) + { + q7 = (q7 < 127) ? (q7 + 1) : (q7 - 1); + } + else + { + q7 = (q7 > 0) ? (q7 - 1) : (q7 + 1); + } + } + + return q7 >> 1; + } + + static inline int to_6_clamp(float c, int pbit) + { + return to_6(basisu::clamp(fast_roundf_int(c), 0, 255), pbit); + } + + static inline uint32_t from_6(uint32_t v, uint32_t p) + { + assert((v < 64) && (p <= 1)); + v = (v << 1) | p; + v = (v << 1) | (v >> 6); + return v; + } + + static inline uint32_t from_4(uint32_t v, uint32_t p) + { + assert((v < 16) && (p <= 1)); + v = (v << 1) | p; + v = (v << 3) | (v >> 2); + return v; + } + + static inline uint32_t from_5(uint32_t v) + { + assert(v < 32); + v = (v << 3) | (v >> 2); + return v; + } + + static inline uint32_t from_5(uint32_t v, uint32_t p) + { + assert((v < 32) && (p <= 1)); + v = (v << 1) | p; + v = (v << 2) | (v >> 4); + return v; + } + + static inline uint32_t from_6(uint32_t v) + { + assert(v < 64); + v = (v << 2) | (v >> 4); + return v; + } + + static inline int to_5(int c8, int pbit) + { + assert((c8 >= 0) && (c8 <= 255)); + assert((pbit == 0) || (pbit == 1)); + + int q6 = (c8 * 63 + 127) / 255; + + if ((q6 & 1) != pbit) + { + const int lhs = c8 * 63; + const int rhs = 255 * q6; + + if (lhs >= rhs) + { + q6 = (q6 < 63) ? (q6 + 1) : (q6 - 1); + } + else + { + q6 = (q6 > 0) ? (q6 - 1) : (q6 + 1); + } + } + + return q6 >> 1; + } + +#if 0 + static inline int to_5(float c, int pbit) + { + assert((c >= 0.0f) && (c <= 255.0f)); + return to_5((int)fast_roundf_pos_int(c), pbit); + } +#endif + + static inline int to_5_clamp(float c, uint32_t pbit) + { + return to_5(basisu::clamp(fast_roundf_int(c), 0, 255), pbit); + } + + //static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w) { assert(w <= 64); return (l * (64 - w) + h * w + 32) >> 6; } + //static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w <= 64); int d = h - l; return (int)l + ((d * (int)w + 32) >> 6); } + //static inline uint32_t bc7_interp3(int l, int d, uint32_t w) { assert(w <= 64); return l + ((d * (int)w + 32) >> 6); } + + static vec4F g_bc7_2bit_ls_tab[4]; + static vec4F g_bc7_3bit_ls_tab[8]; + static vec4F g_bc7_4bit_ls_tab[16]; + static uint16_t g_bc7_part2_bitmasks[64]; + static uint32_t g_part3_bitmasks[64]; + + void init() + { + for (uint32_t i = 0; i < 4; i++) + { + float w = (float)basist::g_bc7_weights2[i] * (1.0f / 64.0f); + g_bc7_2bit_ls_tab[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + for (uint32_t i = 0; i < 8; i++) + { + float w = (float)basist::g_bc7_weights3[i] * (1.0f / 64.0f); + g_bc7_3bit_ls_tab[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + for (uint32_t i = 0; i < 16; i++) + { + float w = (float)basist::g_bc7_weights4[i] * (1.0f / 64.0f); + g_bc7_4bit_ls_tab[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w); + } + + for (uint32_t i = 0; i < 64; i++) + { + uint16_t y = 0; + + for (uint32_t x = 0; x < 16; x++) + y |= (g_bc7_partition2[i * 16 + x] << x); + + g_bc7_part2_bitmasks[i] = y; + } + + for (uint32_t i = 0; i < 64; i++) + { + const uint8_t* pPat = &g_bc7_partition3[i * 16]; + + for (uint32_t j = 0; j < 16; j++) + { + const uint32_t s = pPat[j]; + + if (s == 0) + g_part3_bitmasks[i] |= (1 << j); + else if (s == 1) + g_part3_bitmasks[i] |= (0x10000 << j); + } + } + } + + void encode_mode0_rgb_block(uint8_t* pBlock, uint32_t part_id, // 3 subsets, 4-bits part ID + uint32_t lr[3], uint32_t lg[3], uint32_t lb[3], // 4 bit endpoints + uint32_t hr[3], uint32_t hg[3], uint32_t hb[3], + uint32_t p[6], + const uint8_t* pWeights) // 3-bit weights + { + assert(part_id < 16); + assert((lr[0] | lr[1] | lr[2] | lg[0] | lg[1] | lg[2] | lb[0] | lb[1] | lb[2]) <= 15); + assert((hr[0] | hr[1] | hr[2] | hg[0] | hg[1] | hg[2] | hb[0] | hb[1] | hb[2]) <= 15); + assert((p[0] | p[1] | p[2] | p[3] | p[4] | p[5]) <= 1); + + const uint8_t* pPart_map = &g_bc7_partition3[part_id * 16]; + const uint32_t anchor_index0 = g_bc7_table_anchor_index_third_subset_1[part_id]; + const uint32_t anchor_index1 = g_bc7_table_anchor_index_third_subset_2[part_id]; + + uint32_t weight_inv[3] = { 0, 0, 0 }; + + if (pWeights[0] & 4) + { + std::swap(lr[0], hr[0]); + std::swap(lg[0], hg[0]); + std::swap(lb[0], hb[0]); + std::swap(p[0], p[1]); + weight_inv[0] = 7; + } + + if (pWeights[anchor_index0] & 4) + { + std::swap(lr[1], hr[1]); + std::swap(lg[1], hg[1]); + std::swap(lb[1], hb[1]); + std::swap(p[2], p[3]); + weight_inv[1] = 7; + } + + if (pWeights[anchor_index1] & 4) + { + std::swap(lr[2], hr[2]); + std::swap(lg[2], hg[2]); + std::swap(lb[2], hb[2]); + std::swap(p[4], p[5]); + weight_inv[2] = 7; + } + + uint64_t low = 1ULL | ((part_id) << 1) | + ((lr[0]) << 5) | ((hr[0]) << 9) | + ((lr[1]) << 13) | ((hr[1]) << 17) | + ((lr[2]) << 21) | ((hr[2]) << 25) | + (uint64_t(lg[0]) << 29) | (uint64_t(hg[0]) << 33) | + (uint64_t(lg[1]) << 37) | (uint64_t(hg[1]) << 41) | + (uint64_t(lg[2]) << 45) | (uint64_t(hg[2]) << 49) | + (uint64_t(lb[0]) << 53) | (uint64_t(hb[0]) << 57) | + (uint64_t(lb[1]) << 61); + + pBlock[0] = (uint8_t)low; + pBlock[1] = (uint8_t)(low >> 8); + pBlock[2] = (uint8_t)(low >> 16); + pBlock[3] = (uint8_t)(low >> 24); + pBlock[4] = (uint8_t)(low >> 32); + pBlock[5] = (uint8_t)(low >> 40); + pBlock[6] = (uint8_t)(low >> 48); + pBlock[7] = (uint8_t)(low >> 56); + + uint64_t high = (lb[1] >> 3) | ((hb[1]) << 1) | ((lb[2]) << 5) | ((hb[2]) << 9) | + ((p[0]) << 13) | ((p[1]) << 14) | ((p[2]) << 15) | ((p[3]) << 16) | ((p[4]) << 17) | ((p[5]) << 18); + + uint32_t ofs = 19; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + uint64_t w = pWeights[i] ^ weight_inv[subset_index]; + +#ifdef _DEBUG + assert(w <= 7); + if ((i == 0) || (i == anchor_index0) || (i == anchor_index1)) + { + assert((w & 4) == 0); + } +#endif + + high |= (w << ofs); + ofs += (3 - ((i == 0) || (i == anchor_index0) || (i == anchor_index1))); + } + assert(64 == ofs); + + pBlock[8] = (uint8_t)high; + pBlock[9] = (uint8_t)(high >> 8); + pBlock[10] = (uint8_t)(high >> 16); + pBlock[11] = (uint8_t)(high >> 24); + pBlock[12] = (uint8_t)(high >> 32); + pBlock[13] = (uint8_t)(high >> 40); + pBlock[14] = (uint8_t)(high >> 48); + pBlock[15] = (uint8_t)(high >> 56); + } + + void encode_mode1_rgb_block(uint8_t* pBlock, uint32_t part_id, // 2 subsets, 6-bits part ID + uint32_t lr[2], uint32_t lg[2], uint32_t lb[2], // 6-bit endpoints, 2 shared pbits + uint32_t hr[2], uint32_t hg[2], uint32_t hb[2], + uint32_t p0, uint32_t p1, + const uint8_t* pWeights) // 3-bit weights + { + assert(part_id < 64); + assert((lr[0] | lr[1] | lg[0] | lg[1] | lb[0] | lb[1]) <= 63); + assert((hr[0] | hr[1] | hg[0] | hg[1] | hb[0] | hb[1]) <= 63); + assert((p0 | p1) <= 1); + + const uint8_t* pPart_map = &g_bc7_partition2[part_id * 16]; + const uint32_t anchor_index = g_bc7_table_anchor_index_second_subset[part_id]; + + uint32_t weight_inv[2] = { 0, 0 }; + if (pWeights[0] & 4) + { + std::swap(lr[0], hr[0]); + std::swap(lg[0], hg[0]); + std::swap(lb[0], hb[0]); + weight_inv[0] = 7; + } + + if (pWeights[anchor_index] & 4) + { + std::swap(lr[1], hr[1]); + std::swap(lg[1], hg[1]); + std::swap(lb[1], hb[1]); + weight_inv[1] = 7; + } + + pBlock[0] = (uint8_t)(0b10 | (part_id << 2)); + + uint64_t x = lr[0] | (hr[0] << (6 * 1)); + x |= (lr[1] << (6 * 2)) | (hr[1] << (6 * 3)); + + x |= (lg[0] << (6 * 4)) | (uint64_t(hg[0]) << (6 * 5)); + x |= (uint64_t(lg[1]) << (6 * 6)) | (uint64_t(hg[1]) << (6 * 7)); + + x |= (uint64_t(lb[0]) << (6 * 8)) | (uint64_t(hb[0]) << (6 * 9)); + x |= (uint64_t(lb[1]) << (6 * 10)); + + // 11*6=66 bits total, write first 64 + + pBlock[1] = (uint8_t)x; + pBlock[2] = (uint8_t)(x >> 8); + pBlock[3] = (uint8_t)(x >> 16); + pBlock[4] = (uint8_t)(x >> 24); + + pBlock[5] = (uint8_t)(x >> 32); + pBlock[6] = (uint8_t)(x >> 40); + pBlock[7] = (uint8_t)(x >> 48); + pBlock[8] = (uint8_t)(x >> 56); + + pBlock[9] = (uint8_t)((lb[1] >> 4) | (hb[1] << 2)); + + uint64_t y = p0 | (p1 << 1); + uint32_t ofs = 2; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + uint64_t w = pWeights[i] ^ weight_inv[subset_index]; + +#ifdef _DEBUG + assert(w <= 7); + if ((i == 0) || (i == anchor_index)) + { + assert((w & 4) == 0); + } +#endif + y |= (w << ofs); + + ofs += (3 - ((i == 0) || (i == anchor_index))); + } + assert(48 == ofs); + + pBlock[10] = (uint8_t)y; + pBlock[11] = (uint8_t)(y >> 8); + pBlock[12] = (uint8_t)(y >> 16); + pBlock[13] = (uint8_t)(y >> 24); + pBlock[14] = (uint8_t)(y >> 32); + pBlock[15] = (uint8_t)(y >> 40); + } + + void encode_mode2_rgb_block(uint8_t* pBlock, uint32_t part_id, // 3 subsets, 6-bits part ID + uint32_t lr[3], uint32_t lg[3], uint32_t lb[3], // 5 bit endpoints, no pbits + uint32_t hr[3], uint32_t hg[3], uint32_t hb[3], + const uint8_t* pWeights) // 2-bit weights + { + assert(part_id < 64); + assert((lr[0] | lr[1] | lr[2] | lg[0] | lg[1] | lg[2] | lb[0] | lb[1] | lb[2]) <= 31); + assert((hr[0] | hr[1] | hr[2] | hg[0] | hg[1] | hg[2] | hb[0] | hb[1] | hb[2]) <= 31); + + const uint8_t* pPart_map = &g_bc7_partition3[part_id * 16]; + + uint32_t weight_inv[3] = { 0 }; + if (pWeights[0] & 2) + { + std::swap(lr[0], hr[0]); + std::swap(lg[0], hg[0]); + std::swap(lb[0], hb[0]); + weight_inv[0] = 3; + } + + const uint32_t anchor_index0 = g_bc7_table_anchor_index_third_subset_1[part_id]; + if (pWeights[anchor_index0] & 2) + { + std::swap(lr[1], hr[1]); + std::swap(lg[1], hg[1]); + std::swap(lb[1], hb[1]); + weight_inv[1] = 3; + } + + const uint32_t anchor_index1 = g_bc7_table_anchor_index_third_subset_2[part_id]; + if (pWeights[anchor_index1] & 2) + { + std::swap(lr[2], hr[2]); + std::swap(lg[2], hg[2]); + std::swap(lb[2], hb[2]); + weight_inv[2] = 3; + } + + uint64_t v = 0b100 | (part_id << 3); + v |= (lr[0] << 9) | (hr[0] << (9 + 5 * 1)); + v |= (lr[1] << (9 + 5 * 2)) | (hr[1] << (9 + 5 * 3)); + v |= (uint64_t(lr[2]) << (9 + 5 * 4)) | (uint64_t(hr[2]) << (9 + 5 * 5)); + + v |= (uint64_t(lg[0]) << (9 + 5 * 6)) | (uint64_t(hg[0]) << (9 + 5 * 7)); + v |= (uint64_t(lg[1]) << (9 + 5 * 8)) | (uint64_t(hg[1]) << (9 + 5 * 9)); + v |= (uint64_t(lg[2]) << (9 + 5 * 10)); + + pBlock[0] = (uint8_t)v; + pBlock[1] = (uint8_t)(v >> 8); + pBlock[2] = (uint8_t)(v >> 16); + pBlock[3] = (uint8_t)(v >> 24); + pBlock[4] = (uint8_t)(v >> 32); + pBlock[5] = (uint8_t)(v >> 40); + pBlock[6] = (uint8_t)(v >> 48); + pBlock[7] = (uint8_t)(v >> 56); + + uint64_t v1 = hg[2]; + v1 |= (lb[0] << (5 * 1)) | (hb[0] << (5 * 2)); + v1 |= (lb[1] << (5 * 3)) | (hb[1] << (5 * 4)); + v1 |= (lb[2] << (5 * 5)) | (uint64_t(hb[2]) << (5 * 6)); + + pBlock[8] = (uint8_t)(v1); + pBlock[9] = (uint8_t)(v1 >> 8); + pBlock[10] = (uint8_t)(v1 >> 16); + pBlock[11] = (uint8_t)(v1 >> 24); + + v1 >>= 32; + + // 3 bits left over + uint32_t ofs = 3; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + uint64_t w = pWeights[i] ^ weight_inv[subset_index]; + +#ifdef _DEBUG + assert(w <= 3); + if ((i == 0) || (i == anchor_index0) || (i == anchor_index1)) + { + assert((w & 2) == 0); + } +#endif + v1 |= (w << ofs); + + ofs += (2 - ((i == 0) || (i == anchor_index0) || (i == anchor_index1))); + } + assert(32 == ofs); + + pBlock[12] = (uint8_t)v1; + pBlock[13] = (uint8_t)(v1 >> 8); + pBlock[14] = (uint8_t)(v1 >> 16); + pBlock[15] = (uint8_t)(v1 >> 24); + } + + void encode_mode3_rgb_block(uint8_t* pBlock, uint32_t part_id, // 2 subsets, 6-bits part ID + uint32_t lr[2], uint32_t lg[2], uint32_t lb[2], // 7-bit endpoints, 4 unique pbits + uint32_t hr[2], uint32_t hg[2], uint32_t hb[2], + uint32_t p[4], + const uint8_t* pWeights) // 2-bit weights + { + assert(part_id < 64); + assert((lr[0] | lr[1] | lg[0] | lg[1] | lb[0] | lb[1]) <= 127); + assert((hr[0] | hr[1] | hg[0] | hg[1] | hb[0] | hb[1]) <= 127); + assert((p[0] | p[1] | p[2] | p[3]) <= 1); + + const uint8_t* pPart_map = &g_bc7_partition2[part_id * 16]; + const uint32_t anchor_index = g_bc7_table_anchor_index_second_subset[part_id]; + + uint32_t weight_inv[2] = { 0, 0 }; + if (pWeights[0] & 2) + { + std::swap(lr[0], hr[0]); + std::swap(lg[0], hg[0]); + std::swap(lb[0], hb[0]); + std::swap(p[0], p[1]); + weight_inv[0] = 3; + } + + if (pWeights[anchor_index] & 2) + { + std::swap(lr[1], hr[1]); + std::swap(lg[1], hg[1]); + std::swap(lb[1], hb[1]); + std::swap(p[2], p[3]); + weight_inv[1] = 3; + } + + uint64_t x = 0b1000 | (part_id << 4) | + (lr[0] << 10) | (hr[0] << 17) | + (lr[1] << 24) | (uint64_t(hr[1]) << 31) | + (uint64_t(lg[0]) << 38) | (uint64_t(hg[0]) << 45) | + (uint64_t(lg[1]) << 52) | (uint64_t(hg[1]) << 59); + + pBlock[0] = (uint8_t)x; + pBlock[1] = (uint8_t)(x >> 8); + pBlock[2] = (uint8_t)(x >> 16); + pBlock[3] = (uint8_t)(x >> 24); + pBlock[4] = (uint8_t)(x >> 32); + pBlock[5] = (uint8_t)(x >> 40); + pBlock[6] = (uint8_t)(x >> 48); + pBlock[7] = (uint8_t)(x >> 56); + + // 2 bits of hg[1] remaining to pack + + uint64_t y = (hg[1] >> 5) | (lb[0] << 2) | (hb[0] << 9) | + (lb[1] << (9 + 7 * 1)) | (hb[1] << (9 + 7 * 2)) | + (uint64_t(p[0]) << (9 + 7 * 3)) | (uint64_t(p[1]) << (9 + 7 * 3 + 1)) | + (uint64_t(p[2]) << (9 + 7 * 3 + 2)) | (uint64_t(p[3]) << (9 + 7 * 3 + 3)); + + // now 34 total bits + + uint32_t ofs = 34; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + uint64_t w = pWeights[i] ^ weight_inv[subset_index]; + +#ifdef _DEBUG + assert(w <= 3); + if ((i == 0) || (i == anchor_index)) + { + assert((w & 2) == 0); + } +#endif + + y |= (w << ofs); + ofs += (2 - ((i == 0) || (i == anchor_index))); + } + assert(64 == ofs); + + pBlock[8] = (uint8_t)y; + pBlock[9] = (uint8_t)(y >> 8); + pBlock[10] = (uint8_t)(y >> 16); + pBlock[11] = (uint8_t)(y >> 24); + pBlock[12] = (uint8_t)(y >> 32); + pBlock[13] = (uint8_t)(y >> 40); + pBlock[14] = (uint8_t)(y >> 48); + pBlock[15] = (uint8_t)(y >> 56); + } + + void encode_mode4_rgba_block(uint8_t* pBlock, + uint32_t lr, uint32_t lg, uint32_t lb, uint32_t la, // 5-bit RGB endpoints, 6-bit A endpoints, no p-bits + uint32_t hr, uint32_t hg, uint32_t hb, uint32_t ha, + const uint8_t* pWeights0, const uint8_t* pWeights1, // weights0 are 3-bits (RGB), weights1 are 2-bits (alpha) + uint32_t rot_index, uint32_t index_flag) // rot_index=0 no rotation, if index_flag is 1, the 3-bit indices are for RGB + { + assert((lr | lg | lb | hr | hg | hb) <= 31); + assert((la | ha) <= 63); + assert(rot_index <= 3); + assert(index_flag <= 1); + + // defaults: 2nd plane=always alpha, RGB=3-bit indices, A=2-bits (favoring RGB) + //const uint32_t rot_index = 0, index_flag = 1; + + uint32_t weights_inv[2] = { }; + + const uint8_t* p2BitWeights = index_flag ? pWeights1 : pWeights0; + const uint8_t* p3BitWeights = index_flag ? pWeights0 : pWeights1; + + // 3-bits + if (p3BitWeights[0] & 4) + { + weights_inv[0] = 7; + if (index_flag) + { + std::swap(lr, hr); + std::swap(lg, hg); + std::swap(lb, hb); + } + else + { + std::swap(la, ha); + } + } + + // 2-bits + if (p2BitWeights[0] & 2) + { + weights_inv[1] = 3; + if (index_flag) + { + std::swap(la, ha); + } + else + { + std::swap(lr, hr); + std::swap(lg, hg); + std::swap(lb, hb); + } + } + + pBlock[0] = (uint8_t)(0b10000 | (rot_index << 5) | (index_flag << 7)); + + // 6*5+6*2=42 bits + uint64_t x = lr | (hr << (5 * 1)); + x |= (lg << (5 * 2)) | (hg << (5 * 3)); + x |= (lb << (5 * 4)) | (hb << (5 * 5)); + x |= (uint64_t(la) << (5 * 6)) | (uint64_t(ha) << (5 * 6 + 6)); + + pBlock[1] = (uint8_t)x; + pBlock[2] = (uint8_t)(x >> 8); + pBlock[3] = (uint8_t)(x >> 16); + pBlock[4] = (uint8_t)(x >> 24); + + pBlock[5] = (uint8_t)(x >> 32); + + // 2 leftover bits + x >>= 40; + uint32_t ofs0 = 2; + + // alpha indices (2-bits) + for (uint32_t i = 0; i < 16; i++) + { + assert(p2BitWeights[i] <= 3); + uint64_t w = p2BitWeights[i] ^ weights_inv[1]; + + assert(i || ((w & 2) == 0)); + + x |= (w << ofs0); + + ofs0 += 2 - (i == 0); + } + + // x = 31+2=33 bits + + pBlock[6] = (uint8_t)x; + pBlock[7] = (uint8_t)(x >> 8); + pBlock[8] = (uint8_t)(x >> 16); + pBlock[9] = (uint8_t)(x >> 24); + + x >>= 32; + + // x = 1 bits + uint32_t ofs1 = 1; + + // rgb indices (3-bits) + for (uint32_t i = 0; i < 16; i++) + { + assert(p3BitWeights[i] <= 7); + uint64_t w = p3BitWeights[i] ^ weights_inv[0]; + + assert(i || ((w & 4) == 0)); + + x |= (w << ofs1); + + ofs1 += 3 - (i == 0); + } + + assert(ofs1 == 48); + + // x=48 bits + pBlock[10] = (uint8_t)x; + pBlock[11] = (uint8_t)(x >> 8); + pBlock[12] = (uint8_t)(x >> 16); + pBlock[13] = (uint8_t)(x >> 24); + pBlock[14] = (uint8_t)(x >> 32); + pBlock[15] = (uint8_t)(x >> 40); + } + + // lossless in RGBA + void pack_mode5_solid(uint8_t* pBlock, const color_rgba& c) + { + pBlock[0] = 0b00100000; + + uint32_t lr = basist::g_bc7_mode_5_optimal_endpoints[c[0]].m_lo; + uint32_t hr = basist::g_bc7_mode_5_optimal_endpoints[c[0]].m_hi; + + uint32_t lg = basist::g_bc7_mode_5_optimal_endpoints[c[1]].m_lo; + uint32_t hg = basist::g_bc7_mode_5_optimal_endpoints[c[1]].m_hi; + + uint32_t lb = basist::g_bc7_mode_5_optimal_endpoints[c[2]].m_lo; + uint32_t hb = basist::g_bc7_mode_5_optimal_endpoints[c[2]].m_hi; + + // 8 endpoints are 8-bits, nothing fancy needed + uint32_t a = c[3]; + + // 58 total bits + uint64_t x = lr | (hr << (7 * 1)); + x |= (lg << (7 * 2)) | (hg << (7 * 3)); + x |= (((uint64_t)lb) << (7 * 4)) | (((uint64_t)hb) << (7 * 5)); + x |= (((uint64_t)a) << (7 * 6)) | (((uint64_t)a) << (7 * 6 + 8)); + + // write 56 bits, leaving 2 left over + pBlock[1] = (uint8_t)(x); + pBlock[2] = (uint8_t)(x >> 8); + pBlock[3] = (uint8_t)(x >> 16); + pBlock[4] = (uint8_t)(x >> 24); + pBlock[5] = (uint8_t)(x >> 32); + pBlock[6] = (uint8_t)(x >> 40); + pBlock[7] = (uint8_t)(x >> 48); + + x >>= 56; + assert(x <= 3); + +#if 0 + x |= (0b0101010101010101010101010101011ull << 2); + + pBlock[8] = (uint8_t)(x); + pBlock[9] = (uint8_t)(x >> 8); + pBlock[10] = (uint8_t)(x >> 16); + pBlock[11] = (uint8_t)(x >> 24); + pBlock[12] = (uint8_t)(x >> 32); + pBlock[13] = 0; + pBlock[14] = 0; + pBlock[15] = 0; +#elif 0 + // 0xaaaaaaac | x + pBlock[8] = (uint8_t)(x) | 0xAC; + + static const uint8_t s_tail_bytes[7] = { 0xaa, 0xaa, 0xaa, 0, 0, 0, 0 }; + memcpy(pBlock + 9, s_tail_bytes, 7); +#elif 1 + static const uint8_t s_tail_bytes[8] = { 0xac, 0xaa, 0xaa, 0xaa, 0, 0, 0, 0 }; + memcpy(pBlock + 8, s_tail_bytes, 8); + pBlock[8] |= (uint8_t)x; +#endif + } + + void encode_mode5_rgba_block(uint8_t* pBlock, + uint32_t lr, uint32_t lg, uint32_t lb, uint32_t la, // 7-bit RGB endpoints, 8-bit alpha endpoints + uint32_t hr, uint32_t hg, uint32_t hb, uint32_t ha, + const uint8_t* pColorWeights, const uint8_t* pAlphaWeights, // both 2-bit weights + uint32_t rot_index = 0) // rot_index=0 no rotation + { + assert((lr | lg | lb | hr | hg | hb) <= 127); + assert((la | ha) <= 255); + assert(rot_index <= 3); + + uint32_t color_inv = 0, alpha_inv = 0; + + if (pColorWeights[0] & 2) + { + std::swap(lr, hr); + std::swap(lg, hg); + std::swap(lb, hb); + color_inv = 3; + } + + if (pAlphaWeights[0] & 2) + { + std::swap(la, ha); + alpha_inv = 3; + } + + uint64_t low = (1ULL << 5) | (rot_index << 6) | + (lr << 8) | (hr << 15) | + (lg << 22) | (uint64_t(hg) << 29) | + (uint64_t(lb) << 36) | (uint64_t(hb) << 43) | + (uint64_t(la) << 50) | (uint64_t(ha) << 58); + + pBlock[0] = (uint8_t)low; + pBlock[1] = (uint8_t)(low >> 8); + pBlock[2] = (uint8_t)(low >> 16); + pBlock[3] = (uint8_t)(low >> 24); + pBlock[4] = (uint8_t)(low >> 32); + pBlock[5] = (uint8_t)(low >> 40); + pBlock[6] = (uint8_t)(low >> 48); + pBlock[7] = (uint8_t)(low >> 56); + + uint64_t high = (ha >> 6) & 3; + + uint32_t ofs = 2; + + for (uint32_t i = 0; i < 16; i++) + { + uint64_t w = pColorWeights[i] ^ color_inv; +#ifdef _DEBUG + assert(w <= 3); + if (i == 0) + { + assert((w & 2) == 0); + } +#endif + high |= (w << ofs); + ofs += (2 - (i == 0)); + } + + assert(33 == ofs); + + for (uint32_t i = 0; i < 16; i++) + { + uint64_t w = pAlphaWeights[i] ^ alpha_inv; +#ifdef _DEBUG + assert(w <= 3); + if (i == 0) + { + assert((w & 2) == 0); + } +#endif + high |= (w << ofs); + ofs += (2 - (i == 0)); + } + + assert(64 == ofs); + + pBlock[8] = (uint8_t)high; + pBlock[9] = (uint8_t)(high >> 8); + pBlock[10] = (uint8_t)(high >> 16); + pBlock[11] = (uint8_t)(high >> 24); + pBlock[12] = (uint8_t)(high >> 32); + pBlock[13] = (uint8_t)(high >> 40); + pBlock[14] = (uint8_t)(high >> 48); + pBlock[15] = (uint8_t)(high >> 56); + } + + void encode_mode6_rgba_block(uint8_t* pBlock, + uint32_t lr, uint32_t lg, uint32_t lb, uint32_t la, uint32_t p0, // 7-bit endpoints, 2 shared p-bits + uint32_t hr, uint32_t hg, uint32_t hb, uint32_t ha, uint32_t p1, + const uint8_t* pWeights) // 4-bit weights + { + assert((lr | lg | lb | la | hr | hg | hb | ha) <= 127); + assert((p0 | p1) <= 1); + + uint32_t weight_inv = 0; + if (pWeights[0] & 8) + { + std::swap(lr, hr); + std::swap(lg, hg); + std::swap(lb, hb); + std::swap(la, ha); + std::swap(p0, p1); + weight_inv = 15; + } + + // 9*7=63 bits + uint64_t x = 0b1000000 | (lr << (7 * 1)) | (hr << (7 * 2)); + x |= (lg << (7 * 3)) | (uint64_t(hg) << (7 * 4)); + x |= (uint64_t(lb) << (7 * 5)) | (uint64_t(hb) << (7 * 6)); + x |= (uint64_t(la) << (7 * 7)) | (uint64_t(ha) << (7 * 8)); + + pBlock[0] = (uint8_t)x; + pBlock[1] = (uint8_t)(x >> 8); + pBlock[2] = (uint8_t)(x >> 16); + pBlock[3] = (uint8_t)(x >> 24); + + pBlock[4] = (uint8_t)(x >> 32); + pBlock[5] = (uint8_t)(x >> 40); + pBlock[6] = (uint8_t)(x >> 48); + x >>= 56; + + // x=7 bits + x |= (p0 << 7); + pBlock[7] = (uint8_t)x; + + uint64_t y = p1; + uint32_t ofs = 1; + // TODO: Unroll/optimize + for (uint32_t i = 0; i < 16; i++) + { + uint64_t w = pWeights[i] ^ weight_inv; + assert(w <= 15); + assert(i || ((w & 8) == 0)); + y |= (w << ofs); + ofs += 3 + (i > 0); + } + assert(64 == ofs); + + pBlock[8] = (uint8_t)y; + pBlock[9] = (uint8_t)(y >> 8); + pBlock[10] = (uint8_t)(y >> 16); + pBlock[11] = (uint8_t)(y >> 24); + pBlock[12] = (uint8_t)(y >> 32); + pBlock[13] = (uint8_t)(y >> 40); + pBlock[14] = (uint8_t)(y >> 48); + pBlock[15] = (uint8_t)(y >> 56); + } + + void encode_mode7_rgba_block(uint8_t* pBlock, uint32_t part_id, // 2 subsets, 6-bits part ID + uint32_t lr[2], uint32_t lg[2], uint32_t lb[2], uint32_t la[2], // 5-bit endpoints, unique pbits + uint32_t hr[2], uint32_t hg[2], uint32_t hb[2], uint32_t ha[2], + uint32_t p[4], + const uint8_t* pWeights) // 2-bit weights + { + assert(part_id < 64); + assert((lr[0] | lr[1] | lg[0] | lg[1] | lb[0] | lb[1] | la[0] | la[1]) <= 31); + assert((hr[0] | hr[1] | hg[0] | hg[1] | hb[0] | hb[1] | ha[0] | ha[1]) <= 31); + assert((p[0] | p[1] | p[2] | p[3]) <= 1); + + const uint8_t* pPart_map = &g_bc7_partition2[part_id * 16]; + const uint32_t anchor_index = g_bc7_table_anchor_index_second_subset[part_id]; + + uint32_t weight_inv[2] = { 0, 0 }; + if (pWeights[0] & 2) + { + std::swap(lr[0], hr[0]); std::swap(lg[0], hg[0]); std::swap(lb[0], hb[0]); std::swap(la[0], ha[0]); + std::swap(p[0], p[1]); + weight_inv[0] = 3; + } + + if (pWeights[anchor_index] & 2) + { + std::swap(lr[1], hr[1]); std::swap(lg[1], hg[1]); std::swap(lb[1], hb[1]); std::swap(la[1], ha[1]); + std::swap(p[2], p[3]); + weight_inv[1] = 3; + } + + uint64_t x = 0x80ULL | (part_id << 8) | + (lr[0] << 14) | (hr[0] << 19) | (lr[1] << 24) | (uint64_t(hr[1]) << 29) | + (uint64_t(lg[0]) << 34) | (uint64_t(hg[0]) << 39) | (uint64_t(lg[1]) << 44) | (uint64_t(hg[1]) << 49) | + (uint64_t(lb[0]) << 54) | (uint64_t(hb[0]) << 59); + + pBlock[0] = (uint8_t)x; + pBlock[1] = (uint8_t)(x >> 8); + pBlock[2] = (uint8_t)(x >> 16); + pBlock[3] = (uint8_t)(x >> 24); + + pBlock[4] = (uint8_t)(x >> 32); + pBlock[5] = (uint8_t)(x >> 40); + pBlock[6] = (uint8_t)(x >> 48); + pBlock[7] = (uint8_t)(x >> 56); + + uint64_t y = (lb[1] << 0) | (hb[1] << 5) | + (la[0] << 10) | (ha[0] << 15) | (la[1] << 20) | (ha[1] << 25) | + (uint64_t(p[0]) << 30) | (uint64_t(p[1]) << 31) | (uint64_t(p[2]) << 32) | (uint64_t(p[3]) << 33); + + uint32_t ofs = 34; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + uint64_t w = pWeights[i] ^ weight_inv[subset_index]; + +#ifdef _DEBUG + assert(w <= 3); + if ((i == 0) || (i == anchor_index)) + { + assert((w & 2) == 0); + } +#endif + + y |= (w << ofs); + ofs += (2 - ((i == 0) || (i == anchor_index))); + } + assert(64 == ofs); + + pBlock[8] = (uint8_t)y; + pBlock[9] = (uint8_t)(y >> 8); + pBlock[10] = (uint8_t)(y >> 16); + pBlock[11] = (uint8_t)(y >> 24); + + pBlock[12] = (uint8_t)(y >> 32); + pBlock[13] = (uint8_t)(y >> 40); + pBlock[14] = (uint8_t)(y >> 48); + pBlock[15] = (uint8_t)(y >> 56); + } + + static bool compute_least_squares_endpoints_1D( + uint32_t N, const uint8_t* pWeights, uint32_t num_weights, + const vec4F* pSelector_weights, + float& xl, float& xh, + const color_rgba* pColors, uint32_t comp_index, + float t_r) + { + BASISU_NOTE_UNUSED(num_weights); + + float z00 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pWeights[i]; + assert(sel < num_weights); + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + const float w = pSelector_weights[sel][3]; + + q00_r += w * (float)pColors[i][comp_index]; + } + + float q10_r = t_r - q00_r; + + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + xh = basisu::clamp(iz00 * q00_r + iz01 * q10_r, 0.0f, 255.0f); + xl = basisu::clamp(iz10 * q00_r + iz11 * q10_r, 0.0f, 255.0f); + + return true; + } + + static bool compute_least_squares_endpoints_3D( + uint32_t N, const uint8_t* pWeights, uint32_t num_weights, + const vec4F* pSelector_weights, + vec4F& xl, vec4F& xh, + const color_rgba* pColors, + float t_r, float t_g, float t_b) + { + BASISU_NOTE_UNUSED(num_weights); + + float z00 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q00_g = 0.0f, q00_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pWeights[i]; + assert(sel < num_weights); + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + const float w = pSelector_weights[sel][3]; + + q00_r += w * (float)pColors[i][0]; + q00_g += w * (float)pColors[i][1]; + q00_b += w * (float)pColors[i][2]; + } + + float q10_r = t_r - q00_r; + float q10_g = t_g - q00_g; + float q10_b = t_b - q00_b; + + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + xh[0] = basisu::clamp(iz00 * q00_r + iz01 * q10_r, 0.0f, 255.0f); + xl[0] = basisu::clamp(iz10 * q00_r + iz11 * q10_r, 0.0f, 255.0f); + + xh[1] = basisu::clamp(iz00 * q00_g + iz01 * q10_g, 0.0f, 255.0f); + xl[1] = basisu::clamp(iz10 * q00_g + iz11 * q10_g, 0.0f, 255.0f); + + xh[2] = basisu::clamp(iz00 * q00_b + iz01 * q10_b, 0.0f, 255.0f); + xl[2] = basisu::clamp(iz10 * q00_b + iz11 * q10_b, 0.0f, 255.0f); + + xh[3] = 0; + xl[3] = 0; + + return true; + } + + static bool compute_least_squares_endpoints_4D( + uint32_t N, const uint8_t* pWeights, uint32_t num_weights, + const vec4F* pSelector_weights, + vec4F& xl, vec4F& xh, + const color_rgba* pColors, + float t_r, float t_g, float t_b, float t_a) + { + BASISU_NOTE_UNUSED(num_weights); + + float z00 = 0.0f, z10 = 0.0f, z11 = 0.0f; + float q00_r = 0.0f, q00_g = 0.0f, q00_b = 0.0f, q00_a = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pWeights[i]; + assert(sel < num_weights); + + z00 += pSelector_weights[sel][0]; + z10 += pSelector_weights[sel][1]; + z11 += pSelector_weights[sel][2]; + + const float w = pSelector_weights[sel][3]; + + q00_r += w * (float)pColors[i][0]; + q00_g += w * (float)pColors[i][1]; + q00_b += w * (float)pColors[i][2]; + q00_a += w * (float)pColors[i][3]; + } + + float q10_r = t_r - q00_r; + float q10_g = t_g - q00_g; + float q10_b = t_b - q00_b; + float q10_a = t_a - q00_a; + + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + xh[0] = basisu::clamp(iz00 * q00_r + iz01 * q10_r, 0.0f, 255.0f); + xl[0] = basisu::clamp(iz10 * q00_r + iz11 * q10_r, 0.0f, 255.0f); + + xh[1] = basisu::clamp(iz00 * q00_g + iz01 * q10_g, 0.0f, 255.0f); + xl[1] = basisu::clamp(iz10 * q00_g + iz11 * q10_g, 0.0f, 255.0f); + + xh[2] = basisu::clamp(iz00 * q00_b + iz01 * q10_b, 0.0f, 255.0f); + xl[2] = basisu::clamp(iz10 * q00_b + iz11 * q10_b, 0.0f, 255.0f); + + xh[3] = basisu::clamp(iz00 * q00_a + iz01 * q10_a, 0.0f, 255.0f); + xl[3] = basisu::clamp(iz10 * q00_a + iz11 * q10_a, 0.0f, 255.0f); + + return true; + } + +#if BASISU_BC7F_USE_SSE41 + void bc7_proj_minmax_indices_sse41(const color_rgba* __restrict pPixels, int saxis_r, int saxis_g, int saxis_b, int* out_min_idx, int* out_max_idx) + { + __m128i coef32 = _mm_setr_epi32(saxis_r, saxis_g, saxis_b, 0); // 32-bit lanes + coef32 = _mm_srai_epi32(coef32, 4); // arithmetic >>4 in 32-bit + __m128i COEF = _mm_packs_epi32(coef32, coef32); + + const __m128i ZERO = _mm_setzero_si128(); + + __m128i vmin, vmax; + { + const __m128i px = _mm_loadu_si128((const __m128i*) & pPixels[0]); + const __m128i lo16 = _mm_unpacklo_epi8(px, ZERO); // [r0 g0 b0 a0 r1 g1 b1 a1] + const __m128i hi16 = _mm_unpackhi_epi8(px, ZERO); // [r2 g2 b2 a2 r3 g3 b3 a3] + + const __m128i lo32p = _mm_madd_epi16(lo16, COEF); + const __m128i hi32p = _mm_madd_epi16(hi16, COEF); + + const __m128i lo_sum = _mm_add_epi32(lo32p, _mm_shuffle_epi32(lo32p, _MM_SHUFFLE(2, 3, 0, 1))); + const __m128i hi_sum = _mm_add_epi32(hi32p, _mm_shuffle_epi32(hi32p, _MM_SHUFFLE(2, 3, 0, 1))); + + const __m128i pair01 = _mm_shuffle_epi32(lo_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i pair23 = _mm_shuffle_epi32(hi_sum, _MM_SHUFFLE(2, 0, 2, 0)); + + const __m128i p32p = _mm_unpacklo_epi64(pair01, pair23); + + const __m128i p32 = _mm_slli_epi32(p32p, 4); + + const __m128i keyed = _mm_add_epi32(p32, _mm_set_epi32(3, 2, 1, 0)); + + vmin = keyed; + vmax = keyed; + } + + { + const __m128i px = _mm_loadu_si128((const __m128i*) & pPixels[4]); + const __m128i lo16 = _mm_unpacklo_epi8(px, ZERO); // [r0 g0 b0 a0 r1 g1 b1 a1] + const __m128i hi16 = _mm_unpackhi_epi8(px, ZERO); // [r2 g2 b2 a2 r3 g3 b3 a3] + + const __m128i lo32p = _mm_madd_epi16(lo16, COEF); + const __m128i hi32p = _mm_madd_epi16(hi16, COEF); + + const __m128i lo_sum = _mm_add_epi32(lo32p, _mm_shuffle_epi32(lo32p, _MM_SHUFFLE(2, 3, 0, 1))); + const __m128i hi_sum = _mm_add_epi32(hi32p, _mm_shuffle_epi32(hi32p, _MM_SHUFFLE(2, 3, 0, 1))); + + const __m128i pair01 = _mm_shuffle_epi32(lo_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i pair23 = _mm_shuffle_epi32(hi_sum, _MM_SHUFFLE(2, 0, 2, 0)); + + const __m128i p32p = _mm_unpacklo_epi64(pair01, pair23); + + const __m128i p32 = _mm_slli_epi32(p32p, 4); + + const __m128i keyed = _mm_add_epi32(p32, _mm_set_epi32(7, 6, 5, 4)); + + vmin = _mm_min_epi32(vmin, keyed); + vmax = _mm_max_epi32(vmax, keyed); + } + + { + const __m128i px = _mm_loadu_si128((const __m128i*) & pPixels[8]); + const __m128i lo16 = _mm_unpacklo_epi8(px, ZERO); // [r0 g0 b0 a0 r1 g1 b1 a1] + const __m128i hi16 = _mm_unpackhi_epi8(px, ZERO); // [r2 g2 b2 a2 r3 g3 b3 a3] + + const __m128i lo32p = _mm_madd_epi16(lo16, COEF); + const __m128i hi32p = _mm_madd_epi16(hi16, COEF); + + const __m128i lo_sum = _mm_add_epi32(lo32p, _mm_shuffle_epi32(lo32p, _MM_SHUFFLE(2, 3, 0, 1))); + const __m128i hi_sum = _mm_add_epi32(hi32p, _mm_shuffle_epi32(hi32p, _MM_SHUFFLE(2, 3, 0, 1))); + + const __m128i pair01 = _mm_shuffle_epi32(lo_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i pair23 = _mm_shuffle_epi32(hi_sum, _MM_SHUFFLE(2, 0, 2, 0)); + + const __m128i p32p = _mm_unpacklo_epi64(pair01, pair23); + + const __m128i p32 = _mm_slli_epi32(p32p, 4); + + const __m128i keyed = _mm_add_epi32(p32, _mm_set_epi32(11, 10, 9, 8)); + + vmin = _mm_min_epi32(vmin, keyed); + vmax = _mm_max_epi32(vmax, keyed); + } + + { + const __m128i px = _mm_loadu_si128((const __m128i*) & pPixels[12]); + const __m128i lo16 = _mm_unpacklo_epi8(px, ZERO); // [r0 g0 b0 a0 r1 g1 b1 a1] + const __m128i hi16 = _mm_unpackhi_epi8(px, ZERO); // [r2 g2 b2 a2 r3 g3 b3 a3] + + const __m128i lo32p = _mm_madd_epi16(lo16, COEF); + const __m128i hi32p = _mm_madd_epi16(hi16, COEF); + + const __m128i lo_sum = _mm_add_epi32(lo32p, _mm_shuffle_epi32(lo32p, _MM_SHUFFLE(2, 3, 0, 1))); + const __m128i hi_sum = _mm_add_epi32(hi32p, _mm_shuffle_epi32(hi32p, _MM_SHUFFLE(2, 3, 0, 1))); + + const __m128i pair01 = _mm_shuffle_epi32(lo_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i pair23 = _mm_shuffle_epi32(hi_sum, _MM_SHUFFLE(2, 0, 2, 0)); + + const __m128i p32p = _mm_unpacklo_epi64(pair01, pair23); + + const __m128i p32 = _mm_slli_epi32(p32p, 4); + + const __m128i keyed = _mm_add_epi32(p32, _mm_set_epi32(15, 14, 13, 12)); + + vmin = _mm_min_epi32(vmin, keyed); + vmax = _mm_max_epi32(vmax, keyed); + } + + __m128i t = _mm_shuffle_epi32(vmin, _MM_SHUFFLE(2, 3, 0, 1)); + vmin = _mm_min_epi32(vmin, t); + t = _mm_shuffle_epi32(vmin, _MM_SHUFFLE(1, 0, 3, 2)); + vmin = _mm_min_epi32(vmin, t); + const int min_keyed = _mm_cvtsi128_si32(vmin); + + t = _mm_shuffle_epi32(vmax, _MM_SHUFFLE(2, 3, 0, 1)); + vmax = _mm_max_epi32(vmax, t); + t = _mm_shuffle_epi32(vmax, _MM_SHUFFLE(1, 0, 3, 2)); + vmax = _mm_max_epi32(vmax, t); + const int max_keyed = _mm_cvtsi128_si32(vmax); + + *out_min_idx = (min_keyed & 0xF); + *out_max_idx = (max_keyed & 0xF); + } + + void eval_weights_mode6_rgb_sse41( + const color_rgba* __restrict pPixels, uint8_t* __restrict pWeights, + int lr, int lg, int lb, + int hr, int hg, int hb, + uint32_t p0, uint32_t p1) + { + lr = from_7(lr, p0); lg = from_7(lg, p0); lb = from_7(lb, p0); + hr = from_7(hr, p1); hg = from_7(hg, p1); hb = from_7(hb, p1); + + const int dr = hr - lr; + const int dg = hg - lg; + const int db = hb - lb; + + const float denom = (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db)) + 0.00000125f; + const float f = 15.0f / denom; + + const __m128i ZEROi = _mm_setzero_si128(); + const __m128i FIFTEEN = _mm_set1_epi32(15); + const __m128 F = _mm_set1_ps(f); + const __m128 HALF = _mm_set1_ps(0.5f); + + const __m128i EP16 = _mm_setr_epi16((short)lr, (short)lg, (short)lb, 0, + (short)lr, (short)lg, (short)lb, 0); + + const __m128i COEF = _mm_setr_epi16((short)dr, (short)dg, (short)db, 0, + (short)dr, (short)dg, (short)db, 0); + + for (int i = 0; i < 16; i += 4) + { + const __m128i px = _mm_loadu_si128((const __m128i*) & pPixels[i]); + + const __m128i lo16 = _mm_unpacklo_epi8(px, ZEROi); + const __m128i hi16 = _mm_unpackhi_epi8(px, ZEROi); + + const __m128i lo_adj = _mm_sub_epi16(lo16, EP16); + const __m128i hi_adj = _mm_sub_epi16(hi16, EP16); + + const __m128i lo32p = _mm_madd_epi16(lo_adj, COEF); + const __m128i hi32p = _mm_madd_epi16(hi_adj, COEF); + + const __m128i lo_sum = _mm_add_epi32(lo32p, _mm_shuffle_epi32(lo32p, _MM_SHUFFLE(2, 3, 0, 1))); + const __m128i hi_sum = _mm_add_epi32(hi32p, _mm_shuffle_epi32(hi32p, _MM_SHUFFLE(2, 3, 0, 1))); + + const __m128i pair01 = _mm_shuffle_epi32(lo_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i pair23 = _mm_shuffle_epi32(hi_sum, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128i dot32 = _mm_unpacklo_epi64(pair01, pair23); + + __m128 y = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(dot32), F), HALF); + __m128i sel32 = _mm_cvttps_epi32(y); + + sel32 = _mm_min_epi32(_mm_max_epi32(sel32, ZEROi), FIFTEEN); + + __m128i sel16 = _mm_packs_epi32(sel32, ZEROi); + __m128i sel8 = _mm_packus_epi16(sel16, ZEROi); + *(uint32_t*)&pWeights[i] = (uint32_t)_mm_cvtsi128_si32(sel8); + } + } +#endif + + BASISU_FORCE_INLINE uint32_t bc7_sse( + int pr, + int lr, + int dr, + int w) + { + assert((w >= 0) && (w <= 64)); + int re = pr - (lr + ((dr * (int)w + 32) >> 6)); + return (re * re); + } + + BASISU_FORCE_INLINE uint32_t bc7_sse( + int pr, int pg, int pb, + int lr, int lg, int lb, + int dr, int dg, int db, + int w) + { + assert((w >= 0) && (w <= 64)); + int re = pr - (lr + ((dr * (int)w + 32) >> 6)); + int ge = pg - (lg + ((dg * (int)w + 32) >> 6)); + int be = pb - (lb + ((db * (int)w + 32) >> 6)); + return (re * re) + (ge * ge) + (be * be); + } + + BASISU_FORCE_INLINE uint32_t bc7_sse( + int pr, int pg, int pb, int pa, + int lr, int lg, int lb, int la, + int dr, int dg, int db, int da, + int w) + { + assert((w >= 0) && (w <= 64)); + int re = pr - (lr + ((dr * (int)w + 32) >> 6)); + int ge = pg - (lg + ((dg * (int)w + 32) >> 6)); + int be = pb - (lb + ((db * (int)w + 32) >> 6)); + int ae = pa - (la + ((da * (int)w + 32) >> 6)); + return (re * re) + (ge * ge) + (be * be) + (ae * ae); + } + + void eval_weights_mode6_rgb(const color_rgba* pPixels, uint8_t* pWeights, // 4-bits + int lr, int lg, int lb, + int hr, int hg, int hb, + uint32_t p0, uint32_t p1) + { + lr = from_7(lr, p0); lg = from_7(lg, p0); lb = from_7(lb, p0); + hr = from_7(hr, p1); hg = from_7(hg, p1); hb = from_7(hb, p1); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 15.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = -(lr * dr + lg * dg + lb * db); + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)(float(pPixels[i + 0][0] * dr + pPixels[i + 0][1] * dg + pPixels[i + 0][2] * db + sofs) * f + .5f); + int sel1 = (int)(float(pPixels[i + 1][0] * dr + pPixels[i + 1][1] * dg + pPixels[i + 1][2] * db + sofs) * f + .5f); + int sel2 = (int)(float(pPixels[i + 2][0] * dr + pPixels[i + 2][1] * dg + pPixels[i + 2][2] * db + sofs) * f + .5f); + int sel3 = (int)(float(pPixels[i + 3][0] * dr + pPixels[i + 3][1] * dg + pPixels[i + 3][2] * db + sofs) * f + .5f); + + if ((uint32_t)sel0 > 15) sel0 = (~sel0 >> 31) & 15; + if ((uint32_t)sel1 > 15) sel1 = (~sel1 >> 31) & 15; + if ((uint32_t)sel2 > 15) sel2 = (~sel2 >> 31) & 15; + if ((uint32_t)sel3 > 15) sel3 = (~sel3 >> 31) & 15; + + pWeights[i + 0] = (uint8_t)sel0; + pWeights[i + 1] = (uint8_t)sel1; + pWeights[i + 2] = (uint8_t)sel2; + pWeights[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode6_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights, // 4-bits + int lr, int lg, int lb, + int hr, int hg, int hb, + uint32_t p0, uint32_t p1) + { + lr = from_7(lr, p0); lg = from_7(lg, p0); lb = from_7(lb, p0); + hr = from_7(hr, p1); hg = from_7(hg, p1); hb = from_7(hb, p1); + + // assumes packed a's are always 127 + const int la = from_7(127, p0); + const int ha = from_7(127, p1); + const int da = ha - la; + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 15.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = -(lr * dr + lg * dg + lb * db); + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)(float(pPixels[i + 0][0] * dr + pPixels[i + 0][1] * dg + pPixels[i + 0][2] * db + sofs) * f + .5f); + int sel1 = (int)(float(pPixels[i + 1][0] * dr + pPixels[i + 1][1] * dg + pPixels[i + 1][2] * db + sofs) * f + .5f); + int sel2 = (int)(float(pPixels[i + 2][0] * dr + pPixels[i + 2][1] * dg + pPixels[i + 2][2] * db + sofs) * f + .5f); + int sel3 = (int)(float(pPixels[i + 3][0] * dr + pPixels[i + 3][1] * dg + pPixels[i + 3][2] * db + sofs) * f + .5f); + + if ((uint32_t)sel0 > 15) sel0 = (~sel0 >> 31) & 15; + if ((uint32_t)sel1 > 15) sel1 = (~sel1 >> 31) & 15; + if ((uint32_t)sel2 > 15) sel2 = (~sel2 >> 31) & 15; + if ((uint32_t)sel3 > 15) sel3 = (~sel3 >> 31) & 15; + + pWeights[i + 0] = (uint8_t)sel0; + pWeights[i + 1] = (uint8_t)sel1; + pWeights[i + 2] = (uint8_t)sel2; + pWeights[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][0], pPixels[i + 0][1], pPixels[i + 0][2], pPixels[i + 0][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel0]); + sse += bc7_sse(pPixels[i + 1][0], pPixels[i + 1][1], pPixels[i + 1][2], pPixels[i + 1][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel1]); + sse += bc7_sse(pPixels[i + 2][0], pPixels[i + 2][1], pPixels[i + 2][2], pPixels[i + 2][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel2]); + sse += bc7_sse(pPixels[i + 3][0], pPixels[i + 3][1], pPixels[i + 3][2], pPixels[i + 3][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel3]); + } + + return sse; + } + + void eval_weights_mode6_rgba(const color_rgba* pPixels, uint8_t* pWeights, // 4-bits + int lr, int lg, int lb, int la, int p0, + int hr, int hg, int hb, int ha, int p1) + { + lr = from_7(lr, p0); lg = from_7(lg, p0); lb = from_7(lb, p0); la = from_7(la, p0); + hr = from_7(hr, p1); hg = from_7(hg, p1); hb = from_7(hb, p1); ha = from_7(ha, p1); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + int da = ha - la; + + const float f = 15.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + basisu::squarei(da) + .00000125f); + + const int sofs = -(lr * dr + lg * dg + lb * db + la * da); + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)(float(pPixels[i + 0][0] * dr + pPixels[i + 0][1] * dg + pPixels[i + 0][2] * db + pPixels[i + 0][3] * da + sofs) * f + .5f); + int sel1 = (int)(float(pPixels[i + 1][0] * dr + pPixels[i + 1][1] * dg + pPixels[i + 1][2] * db + pPixels[i + 1][3] * da + sofs) * f + .5f); + int sel2 = (int)(float(pPixels[i + 2][0] * dr + pPixels[i + 2][1] * dg + pPixels[i + 2][2] * db + pPixels[i + 2][3] * da + sofs) * f + .5f); + int sel3 = (int)(float(pPixels[i + 3][0] * dr + pPixels[i + 3][1] * dg + pPixels[i + 3][2] * db + pPixels[i + 3][3] * da + sofs) * f + .5f); + + if ((uint32_t)sel0 > 15) sel0 = (~sel0 >> 31) & 15; + if ((uint32_t)sel1 > 15) sel1 = (~sel1 >> 31) & 15; + if ((uint32_t)sel2 > 15) sel2 = (~sel2 >> 31) & 15; + if ((uint32_t)sel3 > 15) sel3 = (~sel3 >> 31) & 15; + + pWeights[i + 0] = (uint8_t)sel0; + pWeights[i + 1] = (uint8_t)sel1; + pWeights[i + 2] = (uint8_t)sel2; + pWeights[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode6_rgba_sse(const color_rgba* pPixels, uint8_t* pWeights, // 4-bits + int lr, int lg, int lb, int la, int p0, + int hr, int hg, int hb, int ha, int p1) + { + lr = from_7(lr, p0); lg = from_7(lg, p0); lb = from_7(lb, p0); la = from_7(la, p0); + hr = from_7(hr, p1); hg = from_7(hg, p1); hb = from_7(hb, p1); ha = from_7(ha, p1); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + int da = ha - la; + + const float f = 15.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + basisu::squarei(da) + .00000125f); + + const int sofs = -(lr * dr + lg * dg + lb * db + la * da); + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)(float(pPixels[i + 0][0] * dr + pPixels[i + 0][1] * dg + pPixels[i + 0][2] * db + pPixels[i + 0][3] * da + sofs) * f + .5f); + int sel1 = (int)(float(pPixels[i + 1][0] * dr + pPixels[i + 1][1] * dg + pPixels[i + 1][2] * db + pPixels[i + 1][3] * da + sofs) * f + .5f); + int sel2 = (int)(float(pPixels[i + 2][0] * dr + pPixels[i + 2][1] * dg + pPixels[i + 2][2] * db + pPixels[i + 2][3] * da + sofs) * f + .5f); + int sel3 = (int)(float(pPixels[i + 3][0] * dr + pPixels[i + 3][1] * dg + pPixels[i + 3][2] * db + pPixels[i + 3][3] * da + sofs) * f + .5f); + + if ((uint32_t)sel0 > 15) sel0 = (~sel0 >> 31) & 15; + if ((uint32_t)sel1 > 15) sel1 = (~sel1 >> 31) & 15; + if ((uint32_t)sel2 > 15) sel2 = (~sel2 >> 31) & 15; + if ((uint32_t)sel3 > 15) sel3 = (~sel3 >> 31) & 15; + + pWeights[i + 0] = (uint8_t)sel0; + pWeights[i + 1] = (uint8_t)sel1; + pWeights[i + 2] = (uint8_t)sel2; + pWeights[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][0], pPixels[i + 0][1], pPixels[i + 0][2], pPixels[i + 0][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel0]); + sse += bc7_sse(pPixels[i + 1][0], pPixels[i + 1][1], pPixels[i + 1][2], pPixels[i + 1][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel1]); + sse += bc7_sse(pPixels[i + 2][0], pPixels[i + 2][1], pPixels[i + 2][2], pPixels[i + 2][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel2]); + sse += bc7_sse(pPixels[i + 3][0], pPixels[i + 3][1], pPixels[i + 3][2], pPixels[i + 3][3], lr, lg, lb, la, dr, dg, db, da, basist::g_bc7_weights4[sel3]); + } + + return sse; + } + + void eval_weights_mode1_rgb(const color_rgba* pPixels, uint8_t* pWeights, // 3-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], + uint32_t pbits[2], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], hr[2], hg[2], hb[2], dr[2], dg[2], db[2]; + + for (uint32_t s = 0; s < 2; s++) + { + lr[s] = from_6(blr[s], pbits[s]); + lg[s] = from_6(blg[s], pbits[s]); + lb[s] = from_6(blb[s], pbits[s]); + + hr[s] = from_6(bhr[s], pbits[s]); + hg[s] = from_6(bhg[s], pbits[s]); + hb[s] = from_6(bhb[s], pbits[s]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[2] = + { + 7.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 7) + sel = (~sel >> 31) & 7; + + pWeights[i] = (uint8_t)sel; + } + } + + uint32_t eval_weights_mode1_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights, // 3-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], + uint32_t pbits[2], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], hr[2], hg[2], hb[2], dr[2], dg[2], db[2]; + + for (uint32_t s = 0; s < 2; s++) + { + lr[s] = from_6(blr[s], pbits[s]); + lg[s] = from_6(blg[s], pbits[s]); + lb[s] = from_6(blb[s], pbits[s]); + + hr[s] = from_6(bhr[s], pbits[s]); + hg[s] = from_6(bhg[s], pbits[s]); + hb[s] = from_6(bhb[s], pbits[s]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[2] = + { + 7.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] }; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 7) + sel = (~sel >> 31) & 7; + + pWeights[i] = (uint8_t)sel; + + sse += bc7_sse(pPixels[i][0], pPixels[i][1], pPixels[i][2], lr[subset_index], lg[subset_index], lb[subset_index], dr[subset_index], dg[subset_index], db[subset_index], basist::g_bc7_weights3[sel]); + } + + return sse; + } + + void eval_weights_mode7_rgba(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bla[2], + uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], uint32_t bha[2], + uint32_t pbits[4], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], la[2]; + int hr[2], hg[2], hb[2], ha[2]; + int dr[2], dg[2], db[2], da[2]; + + for (uint32_t s = 0; s < 2; s++) + { + const uint32_t l_pbit = pbits[s * 2 + 0], h_pbit = pbits[s * 2 + 1]; + + lr[s] = from_5(blr[s], l_pbit); + lg[s] = from_5(blg[s], l_pbit); + lb[s] = from_5(blb[s], l_pbit); + la[s] = from_5(bla[s], l_pbit); + + hr[s] = from_5(bhr[s], h_pbit); + hg[s] = from_5(bhg[s], h_pbit); + hb[s] = from_5(bhb[s], h_pbit); + ha[s] = from_5(bha[s], h_pbit); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + da[s] = ha[s] - la[s]; + } + + const float f[2] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + basisu::squarei(da[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + basisu::squarei(da[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0] + la[0] * da[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] + la[1] * da[1] }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] + ((int)pPixels[i][3]) * da[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + } + } + + uint32_t eval_weights_mode7_rgba_sse(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bla[2], + uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], uint32_t bha[2], + uint32_t pbits[4], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], la[2]; + int hr[2], hg[2], hb[2], ha[2]; + int dr[2], dg[2], db[2], da[2]; + + for (uint32_t s = 0; s < 2; s++) + { + const uint32_t l_pbit = pbits[s * 2 + 0], h_pbit = pbits[s * 2 + 1]; + + lr[s] = from_5(blr[s], l_pbit); + lg[s] = from_5(blg[s], l_pbit); + lb[s] = from_5(blb[s], l_pbit); + la[s] = from_5(bla[s], l_pbit); + + hr[s] = from_5(bhr[s], h_pbit); + hg[s] = from_5(bhg[s], h_pbit); + hb[s] = from_5(bhb[s], h_pbit); + ha[s] = from_5(bha[s], h_pbit); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + da[s] = ha[s] - la[s]; + } + + const float f[2] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + basisu::squarei(da[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + basisu::squarei(da[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0] + la[0] * da[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] + la[1] * da[1] }; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] + ((int)pPixels[i][3]) * da[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + + sse += bc7_sse(pPixels[i][0], pPixels[i][1], pPixels[i][2], pPixels[i][3], + lr[subset_index], lg[subset_index], lb[subset_index], la[subset_index], + dr[subset_index], dg[subset_index], db[subset_index], da[subset_index], basist::g_bc7_weights2[sel]); + } + + return sse; + } + + void eval_weights_mode3_rgb(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], + uint32_t pbits[4], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], hr[2], hg[2], hb[2], dr[2], dg[2], db[2]; + + for (uint32_t s = 0; s < 2; s++) + { + lr[s] = from_7(blr[s], pbits[s * 2 + 0]); + lg[s] = from_7(blg[s], pbits[s * 2 + 0]); + lb[s] = from_7(blb[s], pbits[s * 2 + 0]); + + hr[s] = from_7(bhr[s], pbits[s * 2 + 1]); + hg[s] = from_7(bhg[s], pbits[s * 2 + 1]); + hb[s] = from_7(bhb[s], pbits[s * 2 + 1]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[2] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] }; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + } + } + + uint32_t eval_weights_mode3_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[2], uint32_t blg[2], uint32_t blb[2], uint32_t bhr[2], uint32_t bhg[2], uint32_t bhb[2], + uint32_t pbits[4], uint32_t subset_bitmask) + { + int lr[2], lg[2], lb[2], hr[2], hg[2], hb[2], dr[2], dg[2], db[2]; + + for (uint32_t s = 0; s < 2; s++) + { + lr[s] = from_7(blr[s], pbits[s * 2 + 0]); + lg[s] = from_7(blg[s], pbits[s * 2 + 0]); + lb[s] = from_7(blb[s], pbits[s * 2 + 0]); + + hr[s] = from_7(bhr[s], pbits[s * 2 + 1]); + hg[s] = from_7(bhg[s], pbits[s * 2 + 1]); + hb[s] = from_7(bhb[s], pbits[s * 2 + 1]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[2] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f) + }; + + const int sofs[2] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1] }; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = (subset_bitmask >> i) & 1; + + int sel = (int)((float)( + ((int)pPixels[i][0]) * dr[subset_index] + ((int)pPixels[i][1]) * dg[subset_index] + ((int)pPixels[i][2]) * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + + sse += bc7_sse(pPixels[i][0], pPixels[i][1], pPixels[i][2], lr[subset_index], lg[subset_index], lb[subset_index], dr[subset_index], dg[subset_index], db[subset_index], basist::g_bc7_weights2[sel]); + } + + return sse; + } + + void eval_weights_mode0_rgb(const color_rgba* pPixels, uint8_t* pWeights, // 3-bits + uint32_t blr[3], uint32_t blg[3], uint32_t blb[3], + uint32_t bhr[3], uint32_t bhg[3], uint32_t bhb[3], + uint32_t pbits[6], + uint32_t pat_index) + { + assert(pat_index <= 15); + int lr[3], lg[3], lb[3], hr[3], hg[3], hb[3], dr[3], dg[3], db[3]; + + for (uint32_t s = 0; s < 3; s++) + { + lr[s] = from_4(blr[s], pbits[s * 2 + 0]); + lg[s] = from_4(blg[s], pbits[s * 2 + 0]); + lb[s] = from_4(blb[s], pbits[s * 2 + 0]); + + hr[s] = from_4(bhr[s], pbits[s * 2 + 1]); + hg[s] = from_4(bhg[s], pbits[s * 2 + 1]); + hb[s] = from_4(bhb[s], pbits[s * 2 + 1]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[3] = + { + 7.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[2]) + basisu::squarei(dg[2]) + basisu::squarei(db[2]) + .00000125f) + }; + + const int sofs[3] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1], + lr[2] * dr[2] + lg[2] * dg[2] + lb[2] * db[2] }; + + const uint8_t* pPart_map = &g_bc7_partition3[pat_index * 16]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + int sel = (int)((float)( + (int)pPixels[i][0] * dr[subset_index] + (int)pPixels[i][1] * dg[subset_index] + (int)pPixels[i][2] * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 7) + sel = (~sel >> 31) & 7; + + pWeights[i] = (uint8_t)sel; + } + } + + uint32_t eval_weights_mode0_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights, // 3-bits + uint32_t blr[3], uint32_t blg[3], uint32_t blb[3], + uint32_t bhr[3], uint32_t bhg[3], uint32_t bhb[3], + uint32_t pbits[6], + uint32_t pat_index) + { + assert(pat_index <= 15); + int lr[3], lg[3], lb[3], hr[3], hg[3], hb[3], dr[3], dg[3], db[3]; + + for (uint32_t s = 0; s < 3; s++) + { + lr[s] = from_4(blr[s], pbits[s * 2 + 0]); + lg[s] = from_4(blg[s], pbits[s * 2 + 0]); + lb[s] = from_4(blb[s], pbits[s * 2 + 0]); + + hr[s] = from_4(bhr[s], pbits[s * 2 + 1]); + hg[s] = from_4(bhg[s], pbits[s * 2 + 1]); + hb[s] = from_4(bhb[s], pbits[s * 2 + 1]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[3] = + { + 7.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f), + 7.0f / (float)(basisu::squarei(dr[2]) + basisu::squarei(dg[2]) + basisu::squarei(db[2]) + .00000125f) + }; + + const int sofs[3] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1], + lr[2] * dr[2] + lg[2] * dg[2] + lb[2] * db[2] }; + + const uint8_t* pPart_map = &g_bc7_partition3[pat_index * 16]; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + int sel = (int)((float)( + (int)pPixels[i][0] * dr[subset_index] + (int)pPixels[i][1] * dg[subset_index] + (int)pPixels[i][2] * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 7) + sel = (~sel >> 31) & 7; + + pWeights[i] = (uint8_t)sel; + + sse += bc7_sse(pPixels[i][0], pPixels[i][1], pPixels[i][2], + lr[subset_index], lg[subset_index], lb[subset_index], + dr[subset_index], dg[subset_index], db[subset_index], basist::g_bc7_weights3[sel]); + } + + return sse; + } + + void eval_weights_mode2_rgb(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[3], uint32_t blg[3], uint32_t blb[3], uint32_t bhr[3], uint32_t bhg[3], uint32_t bhb[3], + uint32_t pat_index) + { + int lr[3], lg[3], lb[3], hr[3], hg[3], hb[3], dr[3], dg[3], db[3]; + + for (uint32_t s = 0; s < 3; s++) + { + lr[s] = from_5(blr[s]); + lg[s] = from_5(blg[s]); + lb[s] = from_5(blb[s]); + + hr[s] = from_5(bhr[s]); + hg[s] = from_5(bhg[s]); + hb[s] = from_5(bhb[s]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[3] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[2]) + basisu::squarei(dg[2]) + basisu::squarei(db[2]) + .00000125f) + }; + + const int sofs[3] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1], + lr[2] * dr[2] + lg[2] * dg[2] + lb[2] * db[2] }; + + const uint8_t* pPart_map = &g_bc7_partition3[pat_index * 16]; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + int sel = (int)((float)( + (int)pPixels[i][0] * dr[subset_index] + (int)pPixels[i][1] * dg[subset_index] + (int)pPixels[i][2] * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + } + } + + uint32_t eval_weights_mode2_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights, // 2-bits + uint32_t blr[3], uint32_t blg[3], uint32_t blb[3], uint32_t bhr[3], uint32_t bhg[3], uint32_t bhb[3], + uint32_t pat_index) + { + int lr[3], lg[3], lb[3], hr[3], hg[3], hb[3], dr[3], dg[3], db[3]; + + for (uint32_t s = 0; s < 3; s++) + { + lr[s] = from_5(blr[s]); + lg[s] = from_5(blg[s]); + lb[s] = from_5(blb[s]); + + hr[s] = from_5(bhr[s]); + hg[s] = from_5(bhg[s]); + hb[s] = from_5(bhb[s]); + + dr[s] = hr[s] - lr[s]; + dg[s] = hg[s] - lg[s]; + db[s] = hb[s] - lb[s]; + } + + const float f[3] = + { + 3.0f / (float)(basisu::squarei(dr[0]) + basisu::squarei(dg[0]) + basisu::squarei(db[0]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[1]) + basisu::squarei(dg[1]) + basisu::squarei(db[1]) + .00000125f), + 3.0f / (float)(basisu::squarei(dr[2]) + basisu::squarei(dg[2]) + basisu::squarei(db[2]) + .00000125f) + }; + + const int sofs[3] = { + lr[0] * dr[0] + lg[0] * dg[0] + lb[0] * db[0], + lr[1] * dr[1] + lg[1] * dg[1] + lb[1] * db[1], + lr[2] * dr[2] + lg[2] * dg[2] + lb[2] * db[2] }; + + const uint8_t* pPart_map = &g_bc7_partition3[pat_index * 16]; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = pPart_map[i]; + + int sel = (int)((float)( + (int)pPixels[i][0] * dr[subset_index] + (int)pPixels[i][1] * dg[subset_index] + (int)pPixels[i][2] * db[subset_index] - sofs[subset_index]) * f[subset_index] + .5f); + + if ((uint32_t)sel > 3) + sel = (~sel >> 31) & 3; + + pWeights[i] = (uint8_t)sel; + + sse += bc7_sse(pPixels[i][0], pPixels[i][1], pPixels[i][2], + lr[subset_index], lg[subset_index], lb[subset_index], + dr[subset_index], dg[subset_index], db[subset_index], basist::g_bc7_weights2[sel]); + } + + return sse; + } + + void eval_weights_mode4_3bit_rgb(const color_rgba* pPixels, uint8_t* pWeights0, // 3-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_5(lr); lg = from_5(lg); lb = from_5(lb); + hr = from_5(hr); hg = from_5(hg); hb = from_5(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 7.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 7) sel0 = (~sel0 >> 31) & 7; + if ((uint32_t)sel1 > 7) sel1 = (~sel1 >> 31) & 7; + if ((uint32_t)sel2 > 7) sel2 = (~sel2 >> 31) & 7; + if ((uint32_t)sel3 > 7) sel3 = (~sel3 >> 31) & 7; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode4_3bit_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights0, // 3-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_5(lr); lg = from_5(lg); lb = from_5(lb); + hr = from_5(hr); hg = from_5(hg); hb = from_5(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 7.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 7) sel0 = (~sel0 >> 31) & 7; + if ((uint32_t)sel1 > 7) sel1 = (~sel1 >> 31) & 7; + if ((uint32_t)sel2 > 7) sel2 = (~sel2 >> 31) & 7; + if ((uint32_t)sel3 > 7) sel3 = (~sel3 >> 31) & 7; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][0], pPixels[i + 0][1], pPixels[i + 0][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights3[sel0]); + sse += bc7_sse(pPixels[i + 1][0], pPixels[i + 1][1], pPixels[i + 1][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights3[sel1]); + sse += bc7_sse(pPixels[i + 2][0], pPixels[i + 2][1], pPixels[i + 2][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights3[sel2]); + sse += bc7_sse(pPixels[i + 3][0], pPixels[i + 3][1], pPixels[i + 3][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights3[sel3]); + } + + return sse; + } + + void eval_weights_mode4_2bit_rgb(const color_rgba* pPixels, uint8_t* pWeights0, // 2-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_5(lr); lg = from_5(lg); lb = from_5(lb); + hr = from_5(hr); hg = from_5(hg); hb = from_5(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 3.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode4_2bit_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights0, // 2-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_5(lr); lg = from_5(lg); lb = from_5(lb); + hr = from_5(hr); hg = from_5(hg); hb = from_5(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 3.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][0], pPixels[i + 0][1], pPixels[i + 0][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel0]); + sse += bc7_sse(pPixels[i + 1][0], pPixels[i + 1][1], pPixels[i + 1][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel1]); + sse += bc7_sse(pPixels[i + 2][0], pPixels[i + 2][1], pPixels[i + 2][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel2]); + sse += bc7_sse(pPixels[i + 3][0], pPixels[i + 3][1], pPixels[i + 3][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel3]); + } + + return sse; + } + + void eval_weights_mode4_2bit_a(const color_rgba* pPixels, uint8_t* pWeights1, // 2-bits + int la, int ha) + { + la = from_6(la); + ha = from_6(ha); + + int da = ha - la; + + const float f = 3.0f / (float)(da + .00000125f); + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode4_2bit_a_sse(const color_rgba* pPixels, uint8_t* pWeights1, // 2-bits + int la, int ha) + { + la = from_6(la); + ha = from_6(ha); + + int da = ha - la; + + const float f = 3.0f / (float)(da + .00000125f); + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][3], la, da, basist::g_bc7_weights2[sel0]); + sse += bc7_sse(pPixels[i + 1][3], la, da, basist::g_bc7_weights2[sel1]); + sse += bc7_sse(pPixels[i + 2][3], la, da, basist::g_bc7_weights2[sel2]); + sse += bc7_sse(pPixels[i + 3][3], la, da, basist::g_bc7_weights2[sel3]); + } + + return sse; + } + + void eval_weights_mode4_3bit_a(const color_rgba* pPixels, uint8_t* pWeights1, // 3-bits + int la, int ha) + { + la = from_6(la); + ha = from_6(ha); + + int da = ha - la; + + const float f = 7.0f / (float)(da + .00000125f); + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 7) sel0 = (~sel0 >> 31) & 7; + if ((uint32_t)sel1 > 7) sel1 = (~sel1 >> 31) & 7; + if ((uint32_t)sel2 > 7) sel2 = (~sel2 >> 31) & 7; + if ((uint32_t)sel3 > 7) sel3 = (~sel3 >> 31) & 7; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode4_3bit_a_sse(const color_rgba* pPixels, uint8_t* pWeights1, // 3-bits + int la, int ha) + { + la = from_6(la); + ha = from_6(ha); + + int da = ha - la; + + const float f = 7.0f / (float)(da + .00000125f); + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 7) sel0 = (~sel0 >> 31) & 7; + if ((uint32_t)sel1 > 7) sel1 = (~sel1 >> 31) & 7; + if ((uint32_t)sel2 > 7) sel2 = (~sel2 >> 31) & 7; + if ((uint32_t)sel3 > 7) sel3 = (~sel3 >> 31) & 7; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][3], la, da, basist::g_bc7_weights3[sel0]); + sse += bc7_sse(pPixels[i + 1][3], la, da, basist::g_bc7_weights3[sel1]); + sse += bc7_sse(pPixels[i + 2][3], la, da, basist::g_bc7_weights3[sel2]); + sse += bc7_sse(pPixels[i + 3][3], la, da, basist::g_bc7_weights3[sel3]); + } + + return sse; + } + + void eval_weights_mode5_2bit_rgb(const color_rgba* pPixels, uint8_t* pWeights0, // 2-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_7(lr); lg = from_7(lg); lb = from_7(lb); + hr = from_7(hr); hg = from_7(hg); hb = from_7(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 3.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode5_2bit_rgb_sse(const color_rgba* pPixels, uint8_t* pWeights0, // 2-bits + int lr, int lg, int lb, + int hr, int hg, int hb) + { + lr = from_7(lr); lg = from_7(lg); lb = from_7(lb); + hr = from_7(hr); hg = from_7(hg); hb = from_7(hb); + + int dr = hr - lr; + int dg = hg - lg; + int db = hb - lb; + + const float f = 3.0f / (float)(basisu::squarei(dr) + basisu::squarei(dg) + basisu::squarei(db) + .00000125f); + + const int sofs = lr * dr + lg * dg + lb * db; + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)((int)pPixels[i + 0][0] * dr + (int)pPixels[i + 0][1] * dg + (int)pPixels[i + 0][2] * db - sofs) * f + .5f); + int sel1 = (int)((float)((int)pPixels[i + 1][0] * dr + (int)pPixels[i + 1][1] * dg + (int)pPixels[i + 1][2] * db - sofs) * f + .5f); + int sel2 = (int)((float)((int)pPixels[i + 2][0] * dr + (int)pPixels[i + 2][1] * dg + (int)pPixels[i + 2][2] * db - sofs) * f + .5f); + int sel3 = (int)((float)((int)pPixels[i + 3][0] * dr + (int)pPixels[i + 3][1] * dg + (int)pPixels[i + 3][2] * db - sofs) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights0[i + 0] = (uint8_t)sel0; + pWeights0[i + 1] = (uint8_t)sel1; + pWeights0[i + 2] = (uint8_t)sel2; + pWeights0[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][0], pPixels[i + 0][1], pPixels[i + 0][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel0]); + sse += bc7_sse(pPixels[i + 1][0], pPixels[i + 1][1], pPixels[i + 1][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel1]); + sse += bc7_sse(pPixels[i + 2][0], pPixels[i + 2][1], pPixels[i + 2][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel2]); + sse += bc7_sse(pPixels[i + 3][0], pPixels[i + 3][1], pPixels[i + 3][2], lr, lg, lb, dr, dg, db, basist::g_bc7_weights2[sel3]); + } + + return sse; + } + + void eval_weights_mode5_2bit_a(const color_rgba* pPixels, uint8_t* pWeights1, // 2-bits + int la, int ha) + { + int da = ha - la; + + const float f = 3.0f / (float)(da + .00000125f); + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + } + } + + uint32_t eval_weights_mode5_2bit_a_sse(const color_rgba* pPixels, uint8_t* pWeights1, // 2-bits + int la, int ha) + { + int da = ha - la; + + const float f = 3.0f / (float)(da + .00000125f); + + uint32_t sse = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + int sel0 = (int)((float)(pPixels[i + 0][3] - la) * f + .5f); + int sel1 = (int)((float)(pPixels[i + 1][3] - la) * f + .5f); + int sel2 = (int)((float)(pPixels[i + 2][3] - la) * f + .5f); + int sel3 = (int)((float)(pPixels[i + 3][3] - la) * f + .5f); + + if ((uint32_t)sel0 > 3) sel0 = (~sel0 >> 31) & 3; + if ((uint32_t)sel1 > 3) sel1 = (~sel1 >> 31) & 3; + if ((uint32_t)sel2 > 3) sel2 = (~sel2 >> 31) & 3; + if ((uint32_t)sel3 > 3) sel3 = (~sel3 >> 31) & 3; + + pWeights1[i + 0] = (uint8_t)sel0; + pWeights1[i + 1] = (uint8_t)sel1; + pWeights1[i + 2] = (uint8_t)sel2; + pWeights1[i + 3] = (uint8_t)sel3; + + sse += bc7_sse(pPixels[i + 0][3], la, da, basist::g_bc7_weights2[sel0]); + sse += bc7_sse(pPixels[i + 1][3], la, da, basist::g_bc7_weights2[sel1]); + sse += bc7_sse(pPixels[i + 2][3], la, da, basist::g_bc7_weights2[sel2]); + sse += bc7_sse(pPixels[i + 3][3], la, da, basist::g_bc7_weights2[sel3]); + } + + return sse; + } + + // Determines the best unique pbits to use to encode xl/xh, which are [0,1] + static void determine_unique_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_rgba& bestMinColor, color_rgba& bestMaxColor, uint32_t best_pbits[2]) + { +#ifdef _DEBUG + for (uint32_t c = 0; c < total_comps; c++) + { + assert((xl[c] >= 0.0f) && (xl[c] <= 1.0f)); + assert((xh[c] >= 0.0f) && (xh[c] <= 1.0f)); + } +#endif + + const uint32_t total_bits = comp_bits + 1; + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err0 = 1e+9f; + float best_err1 = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_rgba xMinColor, xMaxColor; + + for (uint32_t c = 0; c < 4; c++) + { + xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) * (1.0f / 2.0f) + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) * (1.0f / 2.0f) + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_rgba scaledLow, scaledHigh; + for (uint32_t i = 0; i < 4; i++) + { + scaledLow[i] = (xMinColor[i] << (8 - total_bits)); + scaledLow[i] |= (scaledLow[i] >> total_bits); + + scaledHigh[i] = (xMaxColor[i] << (8 - total_bits)); + scaledHigh[i] |= (scaledHigh[i] >> total_bits); + } + + float err0 = 0, err1 = 0; + for (uint32_t i = 0; i < total_comps; i++) + { + err0 += basisu::squaref(scaledLow[i] - xl[i] * 255.0f); + err1 += basisu::squaref(scaledHigh[i] - xh[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor[0] = xMinColor[0] >> 1; + bestMinColor[1] = xMinColor[1] >> 1; + bestMinColor[2] = xMinColor[2] >> 1; + bestMinColor[3] = xMinColor[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor[0] = xMaxColor[0] >> 1; + bestMaxColor[1] = xMaxColor[1] >> 1; + bestMaxColor[2] = xMaxColor[2] >> 1; + bestMaxColor[3] = xMaxColor[3] >> 1; + } + } + } + + // Determines the best shared pbits to use to encode xl/xh, which are [0,1] + static void determine_shared_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_rgba& bestMinColor, color_rgba& bestMaxColor, uint32_t best_pbits[2]) + { +#ifdef _DEBUG + for (uint32_t c = 0; c < total_comps; c++) + { + assert((xl[c] >= 0.0f) && (xl[c] <= 1.0f)); + assert((xh[c] >= 0.0f) && (xh[c] <= 1.0f)); + } +#endif + + const uint32_t total_bits = comp_bits + 1; + assert((total_bits >= 4) && (total_bits <= 8)); + + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_rgba xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) * (1.0f / 2.0f) + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) * (1.0f / 2.0f) + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_rgba scaledLow, scaledHigh; + + for (uint32_t i = 0; i < 4; i++) + { + scaledLow[i] = (xMinColor[i] << (8 - total_bits)); + scaledLow[i] |= (scaledLow[i] >> total_bits); + + scaledHigh[i] = (xMaxColor[i] << (8 - total_bits)); + scaledHigh[i] |= (scaledHigh[i] >> total_bits); + } + + float err = 0; + for (uint32_t i = 0; i < total_comps; i++) + err += basisu::squaref((scaledLow[i] * (1.0f / 255.0f)) - xl[i]) + basisu::squaref((scaledHigh[i] * (1.0f / 255.0f)) - xh[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor[j] = xMinColor[j] >> 1; + bestMaxColor[j] = xMaxColor[j] >> 1; + } + } + } + } + + // 4x4 ASTC blocks only, no dp, no subsets, outputs mode 6 + static void pack_from_astc_4x4_single_subset(uint8_t* pDst_block_u8, const astc_helpers::log_astc_block& log_blk) + { + assert(!log_blk.m_dual_plane && (log_blk.m_num_partitions == 1)); + assert((log_blk.m_grid_width <= 4) && (log_blk.m_grid_height <= 4)); + + color_rgba l, h; + astc_ldr_t::decode_endpoints(log_blk.m_color_endpoint_modes[0], log_blk.m_endpoints, log_blk.m_endpoint_ise_range, l, h); + + uint8_t dequantized_weights[16]; + uint8_t upsampled_weights[16]; + + const uint32_t total_weight_vals = log_blk.m_grid_width * log_blk.m_grid_height; + + const astc_helpers::dequant_table& weight_dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + for (uint32_t i = 0; i < total_weight_vals; i++) + { + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + + dequantized_weights[i] = pWeight_dequant[log_blk.m_weights[i]]; + } + + const uint8_t* pUpsampled_weights = dequantized_weights; + if ((log_blk.m_grid_width < 4) || (log_blk.m_grid_height < 4)) + { + astc_helpers::upsample_weight_grid_xuastc_ldr(4, 4, log_blk.m_grid_width, log_blk.m_grid_height, dequantized_weights, upsampled_weights, nullptr, nullptr); + pUpsampled_weights = upsampled_weights; + } + + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)l.r * q, (float)l.g * q, (float)l.b * q, (float)l.a * q }; + float sxh[4] = { (float)h.r * q, (float)h.g * q, (float)h.b * q, (float)h.a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + uint8_t bc7_weights[16]; + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t i = 0; i < 16; i++) + bc7_weights[i] = (uint8_t)((pUpsampled_weights[i] * 15 + 32) >> 6); + + encode_mode6_rgba_block(pDst_block_u8, + bestMinColor.r, bestMinColor.g, bestMinColor.b, bestMinColor.a, best_pbits[0], + bestMaxColor.r, bestMaxColor.g, bestMaxColor.b, bestMaxColor.a, best_pbits[1], + bc7_weights); + } + + // Outputs mode 6 + static void pack_from_astc_single_subset( + uint8_t* pDst_block_u8, + const astc_helpers::log_astc_block& log_blk, + const uint8_t *pUpsampled_weights, + uint32_t weight_ofs_x, uint32_t weight_ofs_y, + uint32_t block_width, uint32_t block_height) + { + BASISU_NOTE_UNUSED(block_width); + BASISU_NOTE_UNUSED(block_height); + + assert(!log_blk.m_dual_plane && (log_blk.m_num_partitions == 1)); + assert((log_blk.m_grid_width <= block_width) && (log_blk.m_grid_height <= block_height)); + assert((weight_ofs_x + 3) < block_width); + assert((weight_ofs_y + 3) < block_height); + + color_rgba l, h; + astc_ldr_t::decode_endpoints(log_blk.m_color_endpoint_modes[0], log_blk.m_endpoints, log_blk.m_endpoint_ise_range, l, h); + + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)l.r * q, (float)l.g * q, (float)l.b * q, (float)l.a * q }; + float sxh[4] = { (float)h.r * q, (float)h.g * q, (float)h.b * q, (float)h.a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + uint8_t bc7_weights[16]; + + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t w = pUpsampled_weights[(weight_ofs_x + x) + (weight_ofs_y + y) * block_width]; + assert(w <= 64); + + bc7_weights[x + y * 4] = (uint8_t)((w * 15 + 32) >> 6); + } // x + } // y + + encode_mode6_rgba_block(pDst_block_u8, + bestMinColor.r, bestMinColor.g, bestMinColor.b, bestMinColor.a, best_pbits[0], + bestMaxColor.r, bestMaxColor.g, bestMaxColor.b, bestMaxColor.a, best_pbits[1], + bc7_weights); + } + + // same or super close endpoints, 8x6 or 6x6 only + void pack_from_astc_to_single_subset_same_endpoints( + uint8_t* pDst_block_u8, + const astc_helpers::log_astc_block& b0, const uint8_t* pUpsampled_weights0, + const astc_helpers::log_astc_block& b1, const uint8_t* pUpsampled_weights1, + int dx, int dy, + uint32_t block_width, uint32_t block_height) + { + assert(!b0.m_dual_plane && (b0.m_num_partitions == 1)); + assert((b0.m_grid_width <= block_width) && (b0.m_grid_height <= block_height)); + assert(!b0.m_solid_color_flag_ldr); + + assert(!b1.m_dual_plane && (b1.m_num_partitions == 1)); + assert((b1.m_grid_width <= block_width) && (b1.m_grid_height <= block_height)); + assert(!b1.m_solid_color_flag_ldr); + + const bool is_6x6 = ((block_width == 6) && (block_height == 6)); + + // Only handles particular BC7 blocks in the 3x3 or 2x3 region. + if (is_6x6) + { + // 6x6 + assert( + (!dx && (dy == 1)) || ((dx == 2) && (dy == 1)) || + ((dx == 1) && !dy) || ((dx == 1) && (dy == 2)) + ); + } + else + { + // 8x6 + assert((block_width == 8) && (block_height == 6)); + assert((dx >= 0) && (dx <= 1)); + assert((dy >= 0) && (dy <= 2)); + } + + color_rgba l, h; + astc_ldr_t::decode_endpoints(b0.m_color_endpoint_modes[0], b0.m_endpoints, b0.m_endpoint_ise_range, l, h); + + color_rgba al, ah; + astc_ldr_t::decode_endpoints(b1.m_color_endpoint_modes[0], b1.m_endpoints, b1.m_endpoint_ise_range, al, ah); + + for (uint32_t c = 0; c < 4; c++) + { + l[c] = (l[c] + al[c] + 1) >> 1; + h[c] = (h[c] + ah[c] + 1) >> 1; + } + + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)l.r * q, (float)l.g * q, (float)l.b * q, (float)l.a * q }; + float sxh[4] = { (float)h.r * q, (float)h.g * q, (float)h.b * q, (float)h.a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + bool top_or_bottom = false; + + if (is_6x6) + top_or_bottom = (dy == 0) || (dy == 2); + + uint8_t bc7_weights[16]; + + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t w; + + if (is_6x6) + { + if (top_or_bottom) + { + if (x < 2) + w = pUpsampled_weights0[basisu::open_range_check((x + 4) + (y + ((dy == 2) ? 2 : 0)) * 6, 0, 36)]; + else + w = pUpsampled_weights1[basisu::open_range_check((x - 2) + (y + ((dy == 2) ? 2 : 0)) * 6, 0, 36)]; + } + else + { + if (y < 2) + w = pUpsampled_weights0[basisu::open_range_check((x + ((dx == 2) ? 2 : 0)) + (y + 4) * 6, 0, 36)]; + else + w = pUpsampled_weights1[basisu::open_range_check((x + ((dx == 2) ? 2 : 0)) + (y - 2) * 6, 0, 36)]; + } + } + else + { + // 8x6 + if (y < 2) + w = pUpsampled_weights0[basisu::open_range_check((dx * 4 + x) + (y + 4) * 8, 0, 48)]; + else + w = pUpsampled_weights1[basisu::open_range_check((dx * 4 + x) + (y - 2) * 8, 0, 48)]; + } + + assert(w <= 64); + + bc7_weights[x + y * 4] = (uint8_t)((w * 15 + 32) >> 6); + } // x + } // y + + encode_mode6_rgba_block(pDst_block_u8, + bestMinColor.r, bestMinColor.g, bestMinColor.b, bestMinColor.a, best_pbits[0], + bestMaxColor.r, bestMaxColor.g, bestMaxColor.b, bestMaxColor.a, best_pbits[1], + bc7_weights); + } + + bool pack_from_astc_6x6_to_two_subsets_different_endpoints( + uint8_t *pDst_block_u8, + const astc_helpers::log_astc_block& b0, const uint8_t* pUpsampled_weights0, + const astc_helpers::log_astc_block& b1, const uint8_t* pUpsampled_weights1, + int dx, int dy) + { + const bool b0_solid = b0.m_solid_color_flag_ldr; + const bool b1_solid = b1.m_solid_color_flag_ldr; + + assert(b0_solid || (!b0.m_dual_plane && (b0.m_num_partitions == 1))); + assert((b0.m_grid_width <= 6) && (b0.m_grid_height <= 6)); + + assert(b1_solid || (!b1.m_dual_plane && (b1.m_num_partitions == 1))); + assert((b1.m_grid_width <= 6) && (b1.m_grid_height <= 6)); + + // Only handles particular BC7 blocks in the 3x3 region. + assert( + (!dx && (dy == 1)) || ((dx == 2) && (dy == 1)) || + ((dx == 1) && !dy) || ((dx == 1) && (dy == 2)) + ); + + color_rgba l[2], h[2]; + if (b0_solid) + { + l[0][0] = h[0][0] = (uint8_t)(b0.m_solid_color[0] >> 8); + l[0][1] = h[0][1] = (uint8_t)(b0.m_solid_color[1] >> 8); + l[0][2] = h[0][2] = (uint8_t)(b0.m_solid_color[2] >> 8); + l[0][3] = h[0][3] = (uint8_t)(b0.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b0.m_color_endpoint_modes[0], b0.m_endpoints, b0.m_endpoint_ise_range, l[0], h[0]); + } + + if (b1_solid) + { + l[1][0] = h[1][0] = (uint8_t)(b1.m_solid_color[0] >> 8); + l[1][1] = h[1][1] = (uint8_t)(b1.m_solid_color[1] >> 8); + l[1][2] = h[1][2] = (uint8_t)(b1.m_solid_color[2] >> 8); + l[1][3] = h[1][3] = (uint8_t)(b1.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b1.m_color_endpoint_modes[0], b1.m_endpoints, b1.m_endpoint_ise_range, l[1], h[1]); + } + + float sxl[2][4], sxh[2][4]; + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 4; j++) + { + const float q = 1.0f / 255.0f; + + sxl[i][j] = l[i][j] * q; + sxh[i][j] = h[i][j] * q; + } // j + } // i + + color_rgba bestMinColor[2], bestMaxColor[2]; + + uint32_t best_p0[2]; + determine_shared_pbits(3, 6, sxl[0], &sxh[0][0], bestMinColor[0], bestMaxColor[0], best_p0); + + uint32_t best_p1[2]; + determine_shared_pbits(3, 6, sxl[1], &sxh[1][0], bestMinColor[1], bestMaxColor[1], best_p1); + + const bool top_or_bottom = (dy == 0) || (dy == 2); + + uint8_t bc7_weights[16]; + + uint32_t part_id = 0; + if ((dx == 0) || (dx == 2)) + part_id = 13; + +#if 0 + const uint8_t* pPart_map = &g_bc7_partition2[part_id * 16]; + uint32_t min_qw[2] = { 256, 256 }, max_qw[2] = { 0, 0 }; +#endif + + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t w; + + if (top_or_bottom) + { + if (x < 2) + w = b0_solid ? 0 : pUpsampled_weights0[basisu::open_range_check((x + 4) + (y + ((dy == 2) ? 2 : 0)) * 6, 0, 36)]; + else + w = b1_solid ? 0 : pUpsampled_weights1[basisu::open_range_check((x - 2) + (y + ((dy == 2) ? 2 : 0)) * 6, 0, 36)]; + } + else + { + if (y < 2) + w = b0_solid ? 0 : pUpsampled_weights0[basisu::open_range_check((x + ((dx == 2) ? 2 : 0)) + (y + 4) * 6, 0, 36)]; + else + w = b1_solid ? 0 : pUpsampled_weights1[basisu::open_range_check((x + ((dx == 2) ? 2 : 0)) + (y - 2) * 6, 0, 36)]; + } + + assert(w <= 64); + + uint32_t qw = ((w * 7 + 32) >> 6); + +#if 0 + const uint8_t s = pPart_map[x + y * 4]; + min_qw[s] = basisu::minimum(min_qw[s], qw); + max_qw[s] = basisu::maximum(max_qw[s], qw); +#endif + + bc7_weights[x + y * 4] = (uint8_t)qw; + } // x + } // y + +#if 0 + const uint32_t w_range_0 = max_qw[0] - min_qw[0]; + const uint32_t w_range_1 = max_qw[1] - min_qw[1]; + const uint32_t W_RANGE_THRESH = 2; + if ((w_range_0 <= W_RANGE_THRESH) || (w_range_1 <= W_RANGE_THRESH)) + return false; +#endif + + uint32_t lr[2] = { bestMinColor[0][0], bestMinColor[1][0] }; + uint32_t lg[2] = { bestMinColor[0][1], bestMinColor[1][1] }; + uint32_t lb[2] = { bestMinColor[0][2], bestMinColor[1][2] }; + + uint32_t hr[2] = { bestMaxColor[0][0], bestMaxColor[1][0] }; + uint32_t hg[2] = { bestMaxColor[0][1], bestMaxColor[1][1] }; + uint32_t hb[2] = { bestMaxColor[0][2], bestMaxColor[1][2] }; + + encode_mode1_rgb_block(pDst_block_u8, part_id, + lr, lg, lb, + hr, hg, hb, + best_p0[0], best_p1[0], + bc7_weights); + + return true; + } + + bool pack_from_astc_8x6_to_two_subsets_different_endpoints( + uint8_t* pDst_block_u8, + const astc_helpers::log_astc_block& b0, const uint8_t* pUpsampled_weights0, + const astc_helpers::log_astc_block& b1, const uint8_t* pUpsampled_weights1, + int dx, int dy) + { + BASISU_NOTE_UNUSED(dy); + + const bool b0_solid = b0.m_solid_color_flag_ldr; + const bool b1_solid = b1.m_solid_color_flag_ldr; + + assert(b0_solid || (!b0.m_dual_plane && (b0.m_num_partitions == 1))); + assert((b0.m_grid_width <= 8) && (b0.m_grid_height <= 6)); + + assert(b1_solid || (!b1.m_dual_plane && (b1.m_num_partitions == 1))); + assert((b1.m_grid_width <= 8) && (b1.m_grid_height <= 6)); + + // Only handles particular BC7 blocks in the 2x3 region. + assert((dx >= 0) && (dx <= 1) && + (dy >= 0) && (dy <= 2)); + + color_rgba l[2], h[2]; + if (b0_solid) + { + l[0][0] = h[0][0] = (uint8_t)(b0.m_solid_color[0] >> 8); + l[0][1] = h[0][1] = (uint8_t)(b0.m_solid_color[1] >> 8); + l[0][2] = h[0][2] = (uint8_t)(b0.m_solid_color[2] >> 8); + l[0][3] = h[0][3] = (uint8_t)(b0.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b0.m_color_endpoint_modes[0], b0.m_endpoints, b0.m_endpoint_ise_range, l[0], h[0]); + } + + if (b1_solid) + { + l[1][0] = h[1][0] = (uint8_t)(b1.m_solid_color[0] >> 8); + l[1][1] = h[1][1] = (uint8_t)(b1.m_solid_color[1] >> 8); + l[1][2] = h[1][2] = (uint8_t)(b1.m_solid_color[2] >> 8); + l[1][3] = h[1][3] = (uint8_t)(b1.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b1.m_color_endpoint_modes[0], b1.m_endpoints, b1.m_endpoint_ise_range, l[1], h[1]); + } + + float sxl[2][4], sxh[2][4]; + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 4; j++) + { + const float q = 1.0f / 255.0f; + + sxl[i][j] = l[i][j] * q; + sxh[i][j] = h[i][j] * q; + } // j + } // i + + color_rgba bestMinColor[2], bestMaxColor[2]; + + uint32_t best_p0[2]; + determine_shared_pbits(3, 6, sxl[0], &sxh[0][0], bestMinColor[0], bestMaxColor[0], best_p0); + + uint32_t best_p1[2]; + determine_shared_pbits(3, 6, sxl[1], &sxh[1][0], bestMinColor[1], bestMaxColor[1], best_p1); + + uint8_t bc7_weights[16]; + + uint32_t part_id = 13; + + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t w; + + if (y < 2) + w = b0_solid ? 0 : pUpsampled_weights0[basisu::open_range_check((dx * 4 + x) + (y + 4) * 8, 0, 48)]; + else + w = b1_solid ? 0 : pUpsampled_weights1[basisu::open_range_check((dx * 4 + x) + (y - 2) * 8, 0, 48)]; + + assert(w <= 64); + + uint32_t qw = ((w * 7 + 32) >> 6); + + bc7_weights[x + y * 4] = (uint8_t)qw; + } // x + } // y + + uint32_t lr[2] = { bestMinColor[0][0], bestMinColor[1][0] }; + uint32_t lg[2] = { bestMinColor[0][1], bestMinColor[1][1] }; + uint32_t lb[2] = { bestMinColor[0][2], bestMinColor[1][2] }; + + uint32_t hr[2] = { bestMaxColor[0][0], bestMaxColor[1][0] }; + uint32_t hg[2] = { bestMaxColor[0][1], bestMaxColor[1][1] }; + uint32_t hb[2] = { bestMaxColor[0][2], bestMaxColor[1][2] }; + + encode_mode1_rgb_block(pDst_block_u8, part_id, + lr, lg, lb, + hr, hg, hb, + best_p0[0], best_p1[0], + bc7_weights); + + return true; + } + + uint32_t fast_pack_bc7_rgb_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + bool pack_from_astc_8x6_to_two_subsets_different_endpoints_hq( + uint8_t* pDst_block_u8, + const astc_helpers::log_astc_block& b0, const uint8_t* pUpsampled_weights0, + const astc_helpers::log_astc_block& b1, const uint8_t* pUpsampled_weights1, + int dx, int dy, bool astc_srgb_decode, bool &fallback_encode_flag) + { + BASISU_NOTE_UNUSED(dy); + + const bool b_solid[2] = { b0.m_solid_color_flag_ldr, b1.m_solid_color_flag_ldr }; + + assert(b_solid[0] || (!b0.m_dual_plane && (b0.m_num_partitions == 1))); + assert((b0.m_grid_width <= 8) && (b0.m_grid_height <= 6)); + + assert(b_solid[1] || (!b1.m_dual_plane && (b1.m_num_partitions == 1))); + assert((b1.m_grid_width <= 8) && (b1.m_grid_height <= 6)); + + // Only handles particular BC7 blocks in the 2x3 region. + assert((dx >= 0) && (dx <= 1) && + (dy >= 0) && (dy <= 2)); + + color_rgba l[2], h[2]; + if (b_solid[0]) + { + l[0][0] = h[0][0] = (uint8_t)(b0.m_solid_color[0] >> 8); + l[0][1] = h[0][1] = (uint8_t)(b0.m_solid_color[1] >> 8); + l[0][2] = h[0][2] = (uint8_t)(b0.m_solid_color[2] >> 8); + l[0][3] = h[0][3] = (uint8_t)(b0.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b0.m_color_endpoint_modes[0], b0.m_endpoints, b0.m_endpoint_ise_range, l[0], h[0]); + } + + if (b_solid[1]) + { + l[1][0] = h[1][0] = (uint8_t)(b1.m_solid_color[0] >> 8); + l[1][1] = h[1][1] = (uint8_t)(b1.m_solid_color[1] >> 8); + l[1][2] = h[1][2] = (uint8_t)(b1.m_solid_color[2] >> 8); + l[1][3] = h[1][3] = (uint8_t)(b1.m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(b1.m_color_endpoint_modes[0], b1.m_endpoints, b1.m_endpoint_ise_range, l[1], h[1]); + } + + uint32_t low_w[2] = { UINT32_MAX, UINT32_MAX }; + uint32_t high_w[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s; + uint32_t w; + + if (y < 2) + { + w = b_solid[0] ? 0 : pUpsampled_weights0[basisu::open_range_check((dx * 4 + x) + (y + 4) * 8, 0, 48)]; + s = 0; + } + else + { + w = b_solid[1] ? 0 : pUpsampled_weights1[basisu::open_range_check((dx * 4 + x) + (y - 2) * 8, 0, 48)]; + s = 1; + } + + assert(w <= 64); + + low_w[s] = basisu::minimum(low_w[s], w); + high_w[s] = basisu::maximum(high_w[s], w); + } // x + } // y + + color_rgba orig_l[2], orig_h[2]; + memcpy(orig_l, l, sizeof(l)); + memcpy(orig_h, h, sizeof(h)); + +#if 1 + uint32_t num_low_stddev = 0; +#endif + + for (uint32_t s = 0; s < 2; s++) + { + if (b_solid[s]) + continue; + + if ((low_w[s] > 0) || (high_w[s] < 64)) + { + for (uint32_t c = 0; c < 3; c++) + { + l[s][c] = (uint8_t)astc_helpers::channel_interpolate(orig_l[s][c], orig_h[s][c], low_w[s], astc_srgb_decode); + h[s][c] = (uint8_t)astc_helpers::channel_interpolate(orig_l[s][c], orig_h[s][c], high_w[s], astc_srgb_decode); + } + } + +#if 1 + uint32_t e_delta = basisu::squarei((int)h[s][0] - (int)l[s][0]) + + basisu::squarei((int)h[s][1] - (int)l[s][1]) + + basisu::squarei((int)h[s][2] - (int)l[s][2]); + + const uint32_t E_DELTA_THRESH = 60; + num_low_stddev += (e_delta < E_DELTA_THRESH); +#endif + } + +#if 1 + if (num_low_stddev == 2) + { + //bc7f::pack_mode5_solid(pDst_block_u8, color_rgba(200, 0, 0, 255)); + //return true; + + assert(!b_solid[0] && !b_solid[1]); + + color_rgba dec_pixels[16]; + + int ep_l[2][3], ep_h[2][3]; + for (uint32_t s = 0; s < 2; s++) + { + for (uint32_t c = 0; c < 3; c++) + { + int le = l[s][c], he = h[s][c]; + + if (astc_srgb_decode) + { + le = (le << 8) | 0x80; + he = (he << 8) | 0x80; + } + else + { + le = (le << 8) | le; + he = (he << 8) | he; + } + + ep_l[s][c] = le; + ep_h[s][c] = he; + } + } + + color_rgba* pDst = dec_pixels; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = (y < 2) ? 0 : 1; + + int w; + if (y < 2) + w = pUpsampled_weights0[basisu::open_range_check((dx * 4 + x) + (y + 4) * 8, 0, 48)]; + else + w = pUpsampled_weights1[basisu::open_range_check((dx * 4 + x) + (y - 2) * 8, 0, 48)]; + + pDst->r = (uint8_t)(astc_helpers::weight_interpolate(ep_l[s][0], ep_h[s][0], w) >> 8); + pDst->g = (uint8_t)(astc_helpers::weight_interpolate(ep_l[s][1], ep_h[s][1], w) >> 8); + pDst->b = (uint8_t)(astc_helpers::weight_interpolate(ep_l[s][2], ep_h[s][2], w) >> 8); + pDst->a = 255; + + ++pDst; + + } // x + } // y + + const uint32_t flags = cPackBC7FlagUseTrivialMode6 | cPackBC7FlagPBitOptMode6; + //const uint32_t flags = cPackBC7FlagUse2SubsetsRGB | cPackBC7FlagPBitOpt | cPackBC7FlagPBitOptMode6 | cPackBC7FlagUseTrivialMode6; + bc7f::fast_pack_bc7_rgb_analytical(pDst_block_u8, dec_pixels, flags); + fallback_encode_flag = true; + return true; + } +#endif + + float sxl[2][4], sxh[2][4]; + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 4; j++) + { + const float q = 1.0f / 255.0f; + + sxl[i][j] = (float)l[i][j] * q; + sxh[i][j] = (float)h[i][j] * q; + } // j + } // i + + color_rgba bestMinColor[2], bestMaxColor[2]; + + uint32_t best_p0[2]; + determine_shared_pbits(3, 6, sxl[0], &sxh[0][0], bestMinColor[0], bestMaxColor[0], best_p0); + + uint32_t best_p1[2]; + determine_shared_pbits(3, 6, sxl[1], &sxh[1][0], bestMinColor[1], bestMaxColor[1], best_p1); + + uint8_t bc7_weights[16]; + + uint32_t part_id = 13; + + float one_over_w_range_scaled[2]; + for (uint32_t s = 0; s < 2; s++) + { + if (low_w[s] == high_w[s]) + one_over_w_range_scaled[s] = 0; + else + one_over_w_range_scaled[s] = 7.0f / (high_w[s] - low_w[s]); + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = (y < 2) ? 0 : 1; + + int qw = 0; + + if (!b_solid[s]) + { + int w; + + if (y < 2) + w = pUpsampled_weights0[basisu::open_range_check((dx * 4 + x) + (y + 4) * 8, 0, 48)]; + else + w = pUpsampled_weights1[basisu::open_range_check((dx * 4 + x) + (y - 2) * 8, 0, 48)]; + + assert(w <= 64); + + if (low_w[s] != high_w[s]) + { + float f = ((float)w - (float)low_w[s]) * one_over_w_range_scaled[s]; + + qw = (int)(f + .5f); + + if ((uint32_t)qw > 7) + { + qw = basisu::clamp(qw, 0, 7); + } + } + } + + bc7_weights[x + y * 4] = (uint8_t)qw; + } // x + } // y + + uint32_t lr[2] = { bestMinColor[0][0], bestMinColor[1][0] }; + uint32_t lg[2] = { bestMinColor[0][1], bestMinColor[1][1] }; + uint32_t lb[2] = { bestMinColor[0][2], bestMinColor[1][2] }; + + uint32_t hr[2] = { bestMaxColor[0][0], bestMaxColor[1][0] }; + uint32_t hg[2] = { bestMaxColor[0][1], bestMaxColor[1][1] }; + uint32_t hb[2] = { bestMaxColor[0][2], bestMaxColor[1][2] }; + + encode_mode1_rgb_block(pDst_block_u8, part_id, + lr, lg, lb, + hr, hg, hb, + best_p0[0], best_p1[0], + bc7_weights); + + return true; + } + + void pack_astc_6x6_to_two_subsets_middle_block( + uint8_t* pDst_block_u8, + const astc_helpers::log_astc_block* blocks[2][2], + const uint8_t(&weights)[2][2][36], + bool do_left_right) + { + const astc_helpers::log_astc_block* p[2]; + const astc_helpers::log_astc_block* q[2]; + + if (do_left_right) + { + // left and right into separate subsets + p[0] = blocks[0][0]; + q[0] = blocks[0][1]; + + p[1] = blocks[1][0]; + q[1] = blocks[1][1]; + } + else + { + // top and bottom into separate subsets + p[0] = blocks[0][0]; + q[0] = blocks[1][0]; + + p[1] = blocks[0][1]; + q[1] = blocks[1][1]; + } + + assert(p[0]->m_solid_color_flag_ldr || (!p[0]->m_dual_plane && (p[0]->m_num_partitions == 1))); + assert((p[0]->m_grid_width <= 6) && (p[0]->m_grid_height <= 6)); + + assert(p[1]->m_solid_color_flag_ldr || (!p[1]->m_dual_plane && (p[1]->m_num_partitions == 1))); + assert((p[1]->m_grid_width <= 6) && (p[1]->m_grid_height <= 6)); + + assert(q[0]->m_solid_color_flag_ldr || (!q[0]->m_dual_plane && (q[0]->m_num_partitions == 1))); + assert((q[0]->m_grid_width <= 6) && (q[0]->m_grid_height <= 6)); + + assert(q[1]->m_solid_color_flag_ldr || (!q[1]->m_dual_plane && (q[1]->m_num_partitions == 1))); + assert((q[1]->m_grid_width <= 6) && (q[1]->m_grid_height <= 6)); + + color_rgba el[2], eh[2]; + color_rgba el2[2], eh2[2]; + + for (uint32_t i = 0; i < 2; i++) + { + if (p[i]->m_solid_color_flag_ldr) + { + el[i][0] = eh[i][0] = (uint8_t)(p[i]->m_solid_color[0] >> 8); + el[i][1] = eh[i][1] = (uint8_t)(p[i]->m_solid_color[1] >> 8); + el[i][2] = eh[i][2] = (uint8_t)(p[i]->m_solid_color[2] >> 8); + el[i][3] = eh[i][3] = (uint8_t)(p[i]->m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(p[i]->m_color_endpoint_modes[0], p[i]->m_endpoints, p[i]->m_endpoint_ise_range, el[i], eh[i]); + } + + if (q[i]->m_solid_color_flag_ldr) + { + el2[i][0] = eh2[i][0] = (uint8_t)(q[i]->m_solid_color[0] >> 8); + el2[i][1] = eh2[i][1] = (uint8_t)(q[i]->m_solid_color[1] >> 8); + el2[i][2] = eh2[i][2] = (uint8_t)(q[i]->m_solid_color[2] >> 8); + el2[i][3] = eh2[i][3] = (uint8_t)(q[i]->m_solid_color[3] >> 8); + } + else + { + astc_ldr_t::decode_endpoints(q[i]->m_color_endpoint_modes[0], q[i]->m_endpoints, q[i]->m_endpoint_ise_range, el2[i], eh2[i]); + } + + assert(el[i][3] == 255); + assert(el2[i][3] == 255); + + assert(eh[i][3] == 255); + assert(eh2[i][3] == 255); + } + + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t c = 0; c < 3; c++) + { + el[i][c] = (el[i][c] + el2[i][c] + 1) >> 1; + eh[i][c] = (eh[i][c] + eh2[i][c] + 1) >> 1; + } // c + } // i + + float sxl[2][4], sxh[2][4]; + for (uint32_t i = 0; i < 2; i++) + { + for (uint32_t j = 0; j < 4; j++) + { + const float S = 1.0f / 255.0f; + + sxl[i][j] = el[i][j] * S; + sxh[i][j] = eh[i][j] * S; + } // j + } // i + + color_rgba bestMinColor[2], bestMaxColor[2]; + + uint32_t best_p0[2]; + determine_shared_pbits(3, 6, sxl[0], &sxh[0][0], bestMinColor[0], bestMaxColor[0], best_p0); + + uint32_t best_p1[2]; + determine_shared_pbits(3, 6, sxl[1], &sxh[1][0], bestMinColor[1], bestMaxColor[1], best_p1); + + uint8_t bc7_weights[16]; + + // TODO: Potentially improve this mapping using a lookup table + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t w = 0; + + if (y < 2) + { + if (x < 2) + { + if (!blocks[0][0]->m_solid_color_flag_ldr) + w = weights[0][0][(x + 4) + (y + 4) * 6]; + } + else + { + if (!blocks[1][0]->m_solid_color_flag_ldr) + w = weights[1][0][(x - 2) + (y + 4) * 6]; + } + } + else + { + if (x < 2) + { + if (!blocks[0][1]->m_solid_color_flag_ldr) + w = weights[0][1][(x + 4) + (y - 2) * 6]; + } + else + { + if (!blocks[1][1]->m_solid_color_flag_ldr) + w = weights[1][1][(x - 2) + (y - 2) * 6]; + } + } + + assert(w <= 64); + + bc7_weights[x + y * 4] = (uint8_t)((w * 7 + 32) >> 6); + } // x + } // y + + uint32_t part_id = 13; + if (do_left_right) + part_id = 0; + + uint32_t lr[2] = { bestMinColor[0][0], bestMinColor[1][0] }; + uint32_t lg[2] = { bestMinColor[0][1], bestMinColor[1][1] }; + uint32_t lb[2] = { bestMinColor[0][2], bestMinColor[1][2] }; + + uint32_t hr[2] = { bestMaxColor[0][0], bestMaxColor[1][0] }; + uint32_t hg[2] = { bestMaxColor[0][1], bestMaxColor[1][1] }; + uint32_t hb[2] = { bestMaxColor[0][2], bestMaxColor[1][2] }; + + encode_mode1_rgb_block(pDst_block_u8, part_id, + lr, lg, lb, + hr, hg, hb, + best_p0[0], best_p1[0], + bc7_weights); + } + +#if 0 + // var must be variance (divided by N, # pixels), not SSE + static inline int calc_span_est(int min_c, int max_c, int mean_c, float var) + { + // variance-implied span: span_var = ~sqrt(12 * var) + int span_var = (int)fast_roundf_pos_int(std::sqrtf((float)(12.0f * var))); + + // take into account available headroom on the low/high end + span_var = basisu::minimum(span_var, 2 * basisu::minimum(mean_c, 255 - mean_c)); + + return basisu::minimum(max_c - min_c, span_var); + } +#endif + + // Multi-channel estimates + // returns total SSE (pixel SSE * num_pixels), span_weights can be nullptr + float analytical_quant_est_sse(int e_levels, int w_levels, int num_chans, const int spans[4], const float span_weights[4], float endpoint_weight_scale, int num_pixels) + { + assert((e_levels >= 2) && (e_levels <= 256) && (w_levels >= 2) && (num_chans)); + assert(spans); + + const float Dep = 1.0f / (float)(e_levels - 1); // endpoint quant step + const float Dw = 1.0f / (float)(w_levels - 1); // weight quant step + + // TODO: precompute + const float N = float(w_levels); + const float ab_sum = (2.0f * N - 1.0f) / (3.0f * (N - 1.0f)); + + float pixel_sse = (e_levels == 256) ? 0.0f : ((Dep * Dep) * ((1.0f / 12.0f) * ab_sum * (255.0f * 255.0f)) * (float)num_chans * endpoint_weight_scale); + + const float k = (Dw * Dw) * (1.0f / 12.0f); + for (int i = 0; i < num_chans; i++) + { + pixel_sse += k * (float)(spans[i] * spans[i]) * (span_weights ? span_weights[i] : 1.0f); + } + + return pixel_sse * float(num_pixels); + } + + // Single channel estimates + float analytical_quant_est_sse(int e_levels, int w_levels, int span, float span_weight, float endpoint_weight_scale, int num_pixels) + { + assert((e_levels >= 2) && (e_levels <= 256) && (w_levels >= 2)); + + const float Dep = 1.0f / (float)(e_levels - 1); // endpoint quant step + const float Dw = 1.0f / (float)(w_levels - 1); // weight quant step + + // TODO: precompute + const float N = float(w_levels); + const float ab_sum = (2.0f * N - 1.0f) / (3.0f * (N - 1.0f)); + + float pixel_sse = (e_levels == 256) ? 0.0f : ((Dep * Dep) * ((1.0f / 12.0f) * ab_sum * (255.0f * 255.0f)) * endpoint_weight_scale); + + pixel_sse += (Dw * Dw) * (1.0f / 12.0f) * (float)(span * span) * span_weight; + + return pixel_sse * float(num_pixels); + } + + // if cov[] wasn't divided by the # of pixels, this is SSE + float estimate_slam_to_line_sse_3D(const float cov[6], float xr, float yr, float zr, float* pOrtho_ratio = nullptr) + { + // total var + const float total_var = cov[0] + cov[3] + cov[5]; + + float l = sqrtf(xr * xr + yr * yr + zr * zr); + if (l < basisu::SMALL_FLOAT_VAL) + { + xr = yr = zr = 0.577350269f; + } + else + { + l = 1.0f / l; + xr *= l; yr *= l; zr *= l; + } + + float xr2 = cov[0] * xr + cov[1] * yr + cov[2] * zr; + float xg2 = cov[1] * xr + cov[3] * yr + cov[4] * zr; + float xb2 = cov[2] * xr + cov[4] * yr + cov[5] * zr; + + // Rayleigh quotient/est var of principal axis + const float principal_axis_var = xr2 * xr + xg2 * yr + xb2 * zr; + + // Compute leftover var, this is the var unexplaind by the principal axis + const float ortho_var = basisu::maximum(0.0f, total_var - principal_axis_var); + + if (pOrtho_ratio) + *pOrtho_ratio = (total_var > basisu::SMALL_FLOAT_VAL) ? (ortho_var / total_var) : 0.0f; + + return ortho_var; + } + + float estimate_slam_to_line_sse_4D(const float cov[10], float xr, float yr, float zr, float wr, float* pOrtho_ratio = nullptr) + { + // total var + const float total_var = cov[0] + cov[4] + cov[7] + cov[9]; + + float l = sqrtf(xr * xr + yr * yr + zr * zr + wr * wr); + if (l < basisu::SMALL_FLOAT_VAL) + { + xr = yr = zr = wr = .5f; + } + else + { + l = 1.0f / l; + xr *= l; yr *= l; zr *= l; wr *= l; + } + + float xr2 = cov[0] * xr + cov[1] * yr + cov[2] * zr + cov[3] * wr; + float xg2 = cov[1] * xr + cov[4] * yr + cov[5] * zr + cov[6] * wr; + float xb2 = cov[2] * xr + cov[5] * yr + cov[7] * zr + cov[8] * wr; + float xa2 = cov[3] * xr + cov[6] * yr + cov[8] * zr + cov[9] * wr; + + // Rayleigh quotient/est var of principal axis + const float principal_axis_var = xr2 * xr + xg2 * yr + xb2 * zr + xa2 * wr; + + // Compute leftover var, this is the var unexplaind by the principal axis + const float ortho_var = basisu::maximum(0.0f, total_var - principal_axis_var); + + if (pOrtho_ratio) + *pOrtho_ratio = (total_var > basisu::SMALL_FLOAT_VAL) ? (ortho_var / total_var) : 0.0f; + + return ortho_var; + } + + uint32_t calc_sse(const uint8_t* pBlock, const color_rgba* pPixels) + { + color_rgba unpacked_pixels[16]; + bool status = bc7u::unpack_bc7(pBlock, unpacked_pixels); + if (!status) + { + assert(0); + return UINT32_MAX; + } + + uint32_t sse = 0; + for (uint32_t i = 0; i < 16; i++) + sse += basisu::squarei(pPixels[i][0] - unpacked_pixels[i][0]) + basisu::squarei(pPixels[i][1] - unpacked_pixels[i][1]) + basisu::squarei(pPixels[i][2] - unpacked_pixels[i][2]) + basisu::squarei(pPixels[i][3] - unpacked_pixels[i][3]); + + return sse; + } + + bool pack_mode1_or_3_rgb(uint8_t* pBlock, const color_rgba* pPixels, + float block_xr, float block_xg, float block_xb, + int block_mean_r, int block_mean_g, int block_mean_b, + float sse_est_to_beat, uint32_t flags, + float* pFinal_sse_est = nullptr, + uint32_t* pActual_sse = nullptr) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode13_evals++; +#endif + + uint32_t desired_pat_bits = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const float r = (float)(pPixels[i].r - block_mean_r); + const float g = (float)(pPixels[i].g - block_mean_g); + const float b = (float)(pPixels[i].b - block_mean_b); + + const uint32_t subset = (r * block_xr + g * block_xg + b * block_xb) > 0.0f; + + desired_pat_bits |= (subset << i); + } + + uint32_t best_diff = UINT32_MAX; + for (uint32_t p = 0; p < MAX_PATTERNS2_TO_CHECK; p++) + { + const uint32_t bc6h_pat_bits = g_bc7_part2_bitmasks[p]; + + int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits); + int diff_inv = 16 - diff; + + uint32_t min_diff = (basisu::minimum(diff, diff_inv) << 8) | p; + if (min_diff < best_diff) + best_diff = min_diff; + } // p + + const uint32_t best_pat_index = best_diff & 0xFF; + const uint32_t best_pat_bits = g_bc7_part2_bitmasks[best_pat_index]; + + int total_r[2] = { }, total_g[2] = { }, total_b[2] = { }, total_c[2] = { }; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + const int subset = (best_pat_bits >> i) & 1; + + total_r[subset] += r; total_g[subset] += g; total_b[subset] += b; + total_c[subset]++; + } + + int mean_r[2], mean_g[2], mean_b[2]; + for (uint32_t s = 0; s < 2; s++) + { + const uint32_t t = total_c[s]; + const uint32_t h = (t >> 1); + + mean_r[s] = (total_r[s] + h) / t; + mean_g[s] = (total_g[s] + h) / t; + mean_b[s] = (total_b[s] + h) / t; + } + + int icov[2][6] = { { }, { } }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + + int r = (int)pPixels[i].r - mean_r[subset]; + int g = (int)pPixels[i].g - mean_g[subset]; + int b = (int)pPixels[i].b - mean_b[subset]; + icov[subset][0] += r * r; icov[subset][1] += r * g; icov[subset][2] += r * b; + icov[subset][3] += g * g; icov[subset][4] += g * b; + icov[subset][5] += b * b; + } + + int ar[2], ag[2], ab[2]; + + // Slam to line SSE estimate is the same for both mode 1 and 3. + float slam_to_line_sse_est = 0.0f; + + for (uint32_t s = 0; s < 2; s++) + { + int block_max_var = basisu::maximum(icov[s][0], icov[s][3], icov[s][5]); + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[s][i]; + + const float sc = 1.0f / ((float)block_max_var + .0000125f); + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + slam_to_line_sse_est += estimate_slam_to_line_sse_3D(cov, alt_xr, alt_xg, alt_xb); + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + ar[s] = (int)((uint32_t)saxis_r << 4U); + ag[s] = (int)((uint32_t)saxis_g << 4U); + ab[s] = (int)((uint32_t)saxis_b << 4U); + } // s + + int low_dot[2] = { INT_MAX, INT_MAX }; + int high_dot[2] = { INT_MIN, INT_MIN }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + const int saxis_r = ar[subset], saxis_g = ag[subset], saxis_b = ab[subset]; + + int dot = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) + i; + + low_dot[subset] = basisu::minimum(low_dot[subset], dot); + high_dot[subset] = basisu::maximum(high_dot[subset], dot); + } + + int low_c[2] = { low_dot[0] & 15, low_dot[1] & 15 }; + int high_c[2] = { high_dot[0] & 15, high_dot[1] & 15 }; + + int spans[4]; + spans[3] = 0; + + // Endpoint/weight quant error estimates for modes 1 and 3 + float quant_err_sse_est[2] = { }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + const uint32_t low_pixel = low_c[subset]; + const uint32_t high_pixel = high_c[subset]; + + for (uint32_t c = 0; c < 3; c++) + spans[c] = pPixels[high_pixel][c] - pPixels[low_pixel][c]; + + // mode 1: 6-bit endpoints, unique pbits, 3 bit weights + quant_err_sse_est[0] += analytical_quant_est_sse(64, 8, 3, spans, nullptr, (flags & cPackBC7FlagPBitOpt) ? UNIQUE_PBIT_DISCOUNT : 1.0f, total_c[subset]); + + // mode 3, 7-bit endpoints, shared pbits, 2-bit weights + quant_err_sse_est[1] += analytical_quant_est_sse(128, 4, 3, spans, nullptr, (flags & cPackBC7FlagPBitOpt) ? SHARED_PBIT_DISCOUNT : 1.0f, total_c[subset]); + + } // subset + + const float total_mode1_est_sse = slam_to_line_sse_est + quant_err_sse_est[0]; + const float total_mode3_est_sse = slam_to_line_sse_est + quant_err_sse_est[1]; + + if (total_mode1_est_sse < total_mode3_est_sse) + { + if (pFinal_sse_est) + *pFinal_sse_est = total_mode1_est_sse; + + // Mode 1: Large span + if (total_mode1_est_sse >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode13_bailouts++; +#endif + return false; + } + + uint32_t lr[2], lg[2], lb[2]; + uint32_t hr[2], hg[2], hb[2]; + uint32_t pbits[2] = { 0, 0 }; + + for (uint32_t s = 0; s < 2; s++) + { + const int lc = low_c[s], hc = high_c[s]; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[lc].r * q, (float)pPixels[lc].g * q, (float)pPixels[lc].b * q, 0 }; + float sxh[4] = { (float)pPixels[hc].r * q, (float)pPixels[hc].g * q, (float)pPixels[hc].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_shared_pbits(3, 6, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + pbits[s] = best_pbits[0]; + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; + } + else + { + int l = pPixels[lc].r + pPixels[lc].g + pPixels[lc].b; + int h = pPixels[hc].r + pPixels[hc].g + pPixels[hc].b; + + if (basisu::maximum(l, h) >= 129 * 3) + pbits[s] = 1; + + lr[s] = to_6(pPixels[lc].r, pbits[s]); + lg[s] = to_6(pPixels[lc].g, pbits[s]); + lb[s] = to_6(pPixels[lc].b, pbits[s]); + + hr[s] = to_6(pPixels[hc].r, pbits[s]); + hg[s] = to_6(pPixels[hc].g, pbits[s]); + hb[s] = to_6(pPixels[hc].b, pbits[s]); + } + } // s + + uint8_t cur_weights[16]; + eval_weights_mode1_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + + float z00[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f }; + float q00_r[2] = { 0.0f }; + float q00_g[2] = { 0.0f }; + float q00_b[2] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + const uint32_t sel = cur_weights[i]; + assert(sel <= 7); + + z00[subset] += g_bc7_3bit_ls_tab[sel][0]; + z10[subset] += g_bc7_3bit_ls_tab[sel][1]; + z11[subset] += g_bc7_3bit_ls_tab[sel][2]; + + const float w = g_bc7_3bit_ls_tab[sel][3]; + + q00_r[subset] += w * (float)pPixels[i][0]; + q00_g[subset] += w * (float)pPixels[i][1]; + q00_b[subset] += w * (float)pPixels[i][2]; + } // i + + for (uint32_t s = 0; s < 2; s++) + { + float q10_r = (float)total_r[s] - q00_r[s]; + float q10_g = (float)total_g[s] - q00_g[s]; + float q10_b = (float)total_b[s] - q00_b[s]; + + float z01 = z10[s]; + + float det = z00[s] * z11[s] - z01 * z10[s]; + if (fabs(det) < 1e-8f) + continue; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11[s] * det; + iz01 = -z01 * det; + iz10 = -z10[s] * det; + iz11 = z00[s] * det; + + const float shr = iz00 * q00_r[s] + iz01 * q10_r; + const float slr = iz10 * q00_r[s] + iz11 * q10_r; + + const float shg = iz00 * q00_g[s] + iz01 * q10_g; + const float slg = iz10 * q00_g[s] + iz11 * q10_g; + + const float shb = iz00 * q00_b[s] + iz01 * q10_b; + const float slb = iz10 * q00_b[s] + iz11 * q10_b; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { basisu::clamp(slr * q, 0.0f, 1.0f), basisu::clamp(slg * q, 0.0f, 1.0f), basisu::clamp(slb * q, 0.0f, 1.0f), 0 }; + float sxh[4] = { basisu::clamp(shr * q, 0.0f, 1.0f), basisu::clamp(shg * q, 0.0f, 1.0f), basisu::clamp(shb * q, 0.0f, 1.0f), 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_shared_pbits(3, 6, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + pbits[s] = best_pbits[0]; + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; + } + else + { + const float l = slr + slg + slb, h = shr + shg + shb; + + pbits[s] = (basisu::maximum(l, h) >= 129.0f * 3.0f); + + lr[s] = to_6_clamp(slr, pbits[s]); + hr[s] = to_6_clamp(shr, pbits[s]); + + lg[s] = to_6_clamp(slg, pbits[s]); + hg[s] = to_6_clamp(shg, pbits[s]); + + lb[s] = to_6_clamp(slb, pbits[s]); + hb[s] = to_6_clamp(shb, pbits[s]); + } + + } // s + + if (pActual_sse) + *pActual_sse = eval_weights_mode1_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + else + eval_weights_mode1_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + + encode_mode1_rgb_block(pBlock, best_pat_index, + lr, lg, lb, hr, hg, hb, pbits[0], pbits[1], cur_weights); + } + else + { + // Mode 3: Small span + if (pFinal_sse_est) + *pFinal_sse_est = total_mode3_est_sse; + + if (total_mode3_est_sse >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode13_bailouts++; +#endif + return false; + } + + uint32_t lr[2], lg[2], lb[2]; + uint32_t hr[2], hg[2], hb[2]; + uint32_t pbits[4]; + + for (uint32_t s = 0; s < 2; s++) + { + const int lc = low_c[s]; + const int hc = high_c[s]; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[lc].r * q, (float)pPixels[lc].g * q, (float)pPixels[lc].b * q, 0 }; + float sxh[4] = { (float)pPixels[hc].r * q, (float)pPixels[hc].g * q, (float)pPixels[hc].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + pbits[s * 2 + 0] = best_pbits[0]; + pbits[s * 2 + 1] = best_pbits[1]; + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; + } + else + { + const int l = pPixels[lc].r + pPixels[lc].g + pPixels[lc].b; + const int l_pbit = (l >= 129); + pbits[s * 2 + 0] = l_pbit; + + lr[s] = to_7(pPixels[lc].r, l_pbit); + lg[s] = to_7(pPixels[lc].g, l_pbit); + lb[s] = to_7(pPixels[lc].b, l_pbit); + + int h = pPixels[hc].r + pPixels[hc].g + pPixels[hc].b; + const int h_pbit = (h >= 129); + pbits[s * 2 + 1] = h_pbit; + + hr[s] = to_7(pPixels[hc].r, h_pbit); + hg[s] = to_7(pPixels[hc].g, h_pbit); + hb[s] = to_7(pPixels[hc].b, h_pbit); + } + } // s + + uint8_t cur_weights[16]; + eval_weights_mode3_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + + float z00[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f }; + float q00_r[2] = { 0.0f }; + float q00_g[2] = { 0.0f }; + float q00_b[2] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + const uint32_t sel = cur_weights[i]; + assert(sel <= 3); + + z00[subset] += g_bc7_2bit_ls_tab[sel][0]; + z10[subset] += g_bc7_2bit_ls_tab[sel][1]; + z11[subset] += g_bc7_2bit_ls_tab[sel][2]; + + const float w = g_bc7_2bit_ls_tab[sel][3]; + + q00_r[subset] += w * (float)pPixels[i][0]; + q00_g[subset] += w * (float)pPixels[i][1]; + q00_b[subset] += w * (float)pPixels[i][2]; + } // i + + for (uint32_t s = 0; s < 2; s++) + { + float q10_r = (float)total_r[s] - q00_r[s]; + float q10_g = (float)total_g[s] - q00_g[s]; + float q10_b = (float)total_b[s] - q00_b[s]; + + float z01 = z10[s]; + + float det = z00[s] * z11[s] - z01 * z10[s]; + if (fabs(det) < 1e-8f) + continue; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11[s] * det; + iz01 = -z01 * det; + iz10 = -z10[s] * det; + iz11 = z00[s] * det; + + const float shr = iz00 * q00_r[s] + iz01 * q10_r; + const float slr = iz10 * q00_r[s] + iz11 * q10_r; + + const float shg = iz00 * q00_g[s] + iz01 * q10_g; + const float slg = iz10 * q00_g[s] + iz11 * q10_g; + + const float shb = iz00 * q00_b[s] + iz01 * q10_b; + const float slb = iz10 * q00_b[s] + iz11 * q10_b; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { basisu::clamp(slr * q, 0.0f, 1.0f), basisu::clamp(slg * q, 0.0f, 1.0f), basisu::clamp(slb * q, 0.0f, 1.0f), 0 }; + float sxh[4] = { basisu::clamp(shr * q, 0.0f, 1.0f), basisu::clamp(shg * q, 0.0f, 1.0f), basisu::clamp(shb * q, 0.0f, 1.0f), 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + pbits[s * 2 + 0] = best_pbits[0]; + pbits[s * 2 + 1] = best_pbits[1]; + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; + } + else + { + const float l = slr + slg + slb; + const int l_pbit = (l >= 129.0f * 3.0f); + pbits[s * 2 + 0] = l_pbit; + + lr[s] = to_7_clamp(slr, l_pbit); + lg[s] = to_7_clamp(slg, l_pbit); + lb[s] = to_7_clamp(slb, l_pbit); + + const float h = shr + shg + shb; + const int h_pbit = (h >= 129.0f * 3.0f); + pbits[s * 2 + 1] = h_pbit; + + hr[s] = to_7_clamp(shr, h_pbit); + hg[s] = to_7_clamp(shg, h_pbit); + hb[s] = to_7_clamp(shb, h_pbit); + } + + } // s + + if (pActual_sse) + *pActual_sse = eval_weights_mode3_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + else + eval_weights_mode3_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_bits); + + encode_mode3_rgb_block(pBlock, best_pat_index, + lr, lg, lb, hr, hg, hb, pbits, cur_weights); + } + +#ifdef _DEBUG + if (pActual_sse) + { + const uint32_t expected_sse = calc_sse(pBlock, pPixels); + assert(expected_sse == *pActual_sse); + } +#endif + + return true; + } + + inline int dist3(int lr, int lg, int lb, int hr, int hg, int hb) + { + return basisu::squarei(hr - lr) + basisu::squarei(hg - lg) + basisu::squarei(hb - lb); + } + + bool determine_3subsets(uint8_t* pFinal_3subsets, + const color_rgba* pPixels, + float block_xr, float block_xg, float block_xb, + int block_mean_r, int block_mean_g, int block_mean_b) + { + uint32_t subset_indices[16]; + int subset_means[2][3] = { }; + int subset_total[2] = { }; + + for (uint32_t i = 0; i < 16; i++) + { + const int rd = pPixels[i].r - block_mean_r; + const int gd = pPixels[i].g - block_mean_g; + const int bd = pPixels[i].b - block_mean_b; + + const uint32_t subset_index = ((float)rd * block_xr + (float)gd * block_xg + (float)bd * block_xb) > 0.0f; + + subset_indices[i] = subset_index; + + subset_means[subset_index][0] += pPixels[i].r; + subset_means[subset_index][1] += pPixels[i].g; + subset_means[subset_index][2] += pPixels[i].b; + + subset_total[subset_index]++; + } + + for (uint32_t i = 0; i < 2; i++) + { + const uint32_t t = subset_total[i]; + if (!t) + return false; + + subset_means[i][0] = (subset_means[i][0] + (t >> 1)) / t; + subset_means[i][1] = (subset_means[i][1] + (t >> 1)) / t; + subset_means[i][2] = (subset_means[i][2] + (t >> 1)) / t; + } + + int subset_sses[2] = { }; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = subset_indices[i]; + + subset_sses[subset_index] += dist3(pPixels[i].r, pPixels[i].g, pPixels[i].b, subset_means[subset_index][0], subset_means[subset_index][1], subset_means[subset_index][2]); + } + + const uint32_t subset_to_split = (subset_sses[1] > subset_sses[0]); + if (subset_total[subset_to_split] < 2) + return false; + + int lo_y = INT_MAX, hi_y = 0; + for (uint32_t i = 0; i < 16; i++) + { + if (subset_indices[i] != subset_to_split) + continue; + + int y = ((pPixels[i].r + pPixels[i].g + pPixels[i].b) << 4) + i; + + lo_y = basisu::minimum(lo_y, y); + hi_y = basisu::maximum(hi_y, y); + } + + const int lo_y_index = lo_y & 15, hi_y_index = hi_y & 15; + if (lo_y_index == hi_y_index) + return false; + + const int lr = pPixels[lo_y_index].r, lg = pPixels[lo_y_index].g, lb = pPixels[lo_y_index].b; + const int hr = pPixels[hi_y_index].r, hg = pPixels[hi_y_index].g, hb = pPixels[hi_y_index].b; + + memset(pFinal_3subsets, 2, 16); + + for (uint32_t i = 0; i < 16; i++) + { + if (subset_indices[i] == subset_to_split) + { + const int dist0 = dist3(lr, lg, lb, pPixels[i].r, pPixels[i].g, pPixels[i].b); + const int dist1 = dist3(hr, hg, hb, pPixels[i].r, pPixels[i].g, pPixels[i].b); + + pFinal_3subsets[i] = dist1 > dist0; + } + } + + return true; + } + + static inline int pop16(uint32_t x) + { +#if defined(_MSC_VER) + return __popcnt16((unsigned short)x); +#else + return __builtin_popcount(x & 0xFFFFu); +#endif + } + + int pick_3subset_pat_index(const uint8_t* pDesired_subsets, uint32_t& best_pat_index_first16) + { + best_pat_index_first16 = 0; + + uint16_t M[3]; + memset(M, 0, sizeof(M)); + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t s = pDesired_subsets[i]; + M[s] |= (1 << i); + } + + const int n0 = pop16(M[0]), n1 = pop16(M[1]), n2 = 16 - n0 - n1; + + int best_score = -1; + int best_pat = 0; + + for (int p = 0; p < (int)MAX_PATTERNS3_TO_CHECK; ++p) + { + uint16_t S0 = (uint16_t)(g_part3_bitmasks[p] & 0xFFFFu); + uint16_t S1 = (uint16_t)(g_part3_bitmasks[p] >> 16); + + // Row sums for subsets 0 and 1 via 6 popcnts; derive subset 2 by subtraction + int C00 = pop16(M[0] & S0), C01 = pop16(M[0] & S1), C02 = n0 - C00 - C01; + int C10 = pop16(M[1] & S0), C11 = pop16(M[1] & S1), C12 = n1 - C10 - C11; + int C20 = pop16(M[2] & S0), C21 = pop16(M[2] & S1), C22 = n2 - C20 - C21; + + int s0 = C00 + C11 + C22; // (0,1,2) + int s1 = C00 + C12 + C21; // (0,2,1) + int s2 = C01 + C10 + C22; // (1,0,2) + int s3 = C01 + C12 + C20; // (1,2,0) + int s4 = C02 + C10 + C21; // (2,0,1) + int s5 = C02 + C11 + C20; // (2,1,0) + + // Argmax over 6 + int s = s0; + if (s1 > s) { s = s1; } + if (s2 > s) { s = s2; } + if (s3 > s) { s = s3; } + if (s4 > s) { s = s4; } + if (s5 > s) { s = s5; } + + if (s > best_score) + { + best_score = s; + best_pat = p; + + if (s == 16) + { + // perfect match so early out + if (p <= 15) + best_pat_index_first16 = best_pat; + break; + } + } + + if (p == 15) + { + // for mode 0 + best_pat_index_first16 = best_pat; + } + } + + return best_pat; + } + +#if 0 + static const uint8_t s_perms3[6][3] = { {0,1,2}, {0,2,1}, {1,0,2}, {1,2,0}, {2,0,1}, {2,1,0} }; + + int pick_3subset_pat_index_slow(const uint8_t* pDesired_subsets, uint32_t& best_pat_index_first16) + { + int best_pat = 0, best_dist = INT_MAX; + + for (uint32_t m = 0; m < 64; m++) + { + const uint8_t* pPat = &g_bc7_partition3[m * 16]; + + for (uint32_t p = 0; p < 6; p++) + { + int trial_dist = 0; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t s = s_perms3[p][pDesired_subsets[i]]; + + trial_dist += (s != pPat[i]); + + } // i + + if (trial_dist < best_dist) + { + best_dist = trial_dist; + best_pat = m; + } + + } // p + + if (m == 15) + best_pat_index_first16 = best_pat; + + } // m + + return best_pat; + } +#endif + + // false if packing failed (not enough unique colors) + bool pack_mode0_or_2_rgb(uint8_t* pBlock, const color_rgba* pPixels, + float block_xr, float block_xg, float block_xb, + int block_mean_r, int block_mean_g, int block_mean_b, float sse_est_to_beat, uint32_t flags, + float* pFinal_sse_est = nullptr, + uint32_t* pActual_sse = nullptr) + { + (void)flags; + +#if BASISU_BC7F_PERF_STATS + g_total_mode02_evals++; +#endif + + uint8_t desired_3subsets[16]; + if (!determine_3subsets(desired_3subsets, pPixels, block_xr, block_xg, block_xb, block_mean_r, block_mean_g, block_mean_b)) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode02_bailouts++; +#endif + if (pFinal_sse_est) + *pFinal_sse_est = 1e+9f; + + return false; + } + + uint32_t best_pat_indices[2]; // mode 0 and 2 + best_pat_indices[1] = pick_3subset_pat_index(desired_3subsets, best_pat_indices[0]); + + assert((best_pat_indices[0] <= 15) && (best_pat_indices[1] <= 63)); + + float total_quant_sse_mode[2] = { }; + float total_slam_to_line_sse_mode[2] = { }; + + int mode_total_c[2][3] = { }; + int mode_low_c[2][3] = { }, mode_high_c[2][3] = { }; + int mode_total_r[2][3] = { }, mode_total_g[2][3] = { }, mode_total_b[2][3] = { }; + + int spans[4] = { }; + + for (uint32_t mode_iter = 0; mode_iter < 2; mode_iter++) // mode 0 vs. mode 2 + { + if ((mode_iter) && (best_pat_indices[0] == best_pat_indices[1])) + { + for (uint32_t s = 0; s < 3; s++) + { + mode_total_c[1][s] = mode_total_c[0][s]; + + mode_low_c[1][s] = mode_low_c[0][s]; + mode_high_c[1][s] = mode_high_c[0][s]; + + mode_total_r[1][s] = mode_total_r[0][s]; + mode_total_g[1][s] = mode_total_g[0][s]; + mode_total_b[1][s] = mode_total_b[0][s]; + + total_slam_to_line_sse_mode[1] = total_slam_to_line_sse_mode[0]; + + } // subset + } + else + { + const uint32_t best_pat_index = best_pat_indices[mode_iter]; + const uint8_t* pBest_pat = &g_bc7_partition3[best_pat_index * 16]; + + int* pTotal_r = &mode_total_r[mode_iter][0]; + int* pTotal_g = &mode_total_g[mode_iter][0]; + int* pTotal_b = &mode_total_b[mode_iter][0]; + + int* pTotal_c = mode_total_c[mode_iter]; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + const int subset = pBest_pat[i]; + + pTotal_r[subset] += r; pTotal_g[subset] += g; pTotal_b[subset] += b; + pTotal_c[subset]++; + } + + int mean_r[3], mean_g[3], mean_b[3]; + for (uint32_t s = 0; s < 3; s++) + { + const uint32_t t = pTotal_c[s]; + const uint32_t h = (t >> 1); + + mean_r[s] = (pTotal_r[s] + h) / t; + mean_g[s] = (pTotal_g[s] + h) / t; + mean_b[s] = (pTotal_b[s] + h) / t; + } + + int icov[3][6] = { }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = pBest_pat[i]; + + int r = (int)pPixels[i].r - mean_r[subset]; + int g = (int)pPixels[i].g - mean_g[subset]; + int b = (int)pPixels[i].b - mean_b[subset]; + icov[subset][0] += r * r; icov[subset][1] += r * g; icov[subset][2] += r * b; + icov[subset][3] += g * g; icov[subset][4] += g * b; + icov[subset][5] += b * b; + } + + int ar[3], ag[3], ab[3]; + + float total_slam_to_line_sse = 0.0f; + + for (uint32_t s = 0; s < 3; s++) + { + int block_max_var = basisu::maximum(icov[s][0], icov[s][3], icov[s][5]); + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[s][i]; + + const float sc = 1.0f / ((float)block_max_var + .0000125f); + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + total_slam_to_line_sse += estimate_slam_to_line_sse_3D(cov, alt_xr, alt_xg, alt_xb); + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + ar[s] = (int)((uint32_t)saxis_r << 4U); + ag[s] = (int)((uint32_t)saxis_g << 4U); + ab[s] = (int)((uint32_t)saxis_b << 4U); + } // s + + total_slam_to_line_sse_mode[mode_iter] = total_slam_to_line_sse; + + int low_dot[3] = { INT_MAX, INT_MAX, INT_MAX }; + int high_dot[3] = { INT_MIN, INT_MIN, INT_MIN }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = pBest_pat[i]; + const int saxis_r = ar[subset], saxis_g = ag[subset], saxis_b = ab[subset]; + + int dot = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) + i; + + low_dot[subset] = basisu::minimum(low_dot[subset], dot); + high_dot[subset] = basisu::maximum(high_dot[subset], dot); + } + + for (uint32_t subset = 0; subset < 3; subset++) + { + mode_low_c[mode_iter][subset] = low_dot[subset] & 15; + mode_high_c[mode_iter][subset] = high_dot[subset] & 15; + } // subset + + } // if ((mode_iter) && (best_pat_indices[0] == best_pat_indices[1])) + + for (uint32_t subset = 0; subset < 3; subset++) + { + const uint32_t low_pixel = mode_low_c[mode_iter][subset]; + const uint32_t high_pixel = mode_high_c[mode_iter][subset]; + + for (uint32_t c = 0; c < 3; c++) + spans[c] = pPixels[high_pixel][c] - pPixels[low_pixel][c]; + + float subset_sse; + if (mode_iter == 0) + { + // mode 0: 4-bit endpoints, unique p-bits, 3-bit weights, slight p-bit endpoint scale factor + subset_sse = analytical_quant_est_sse(16, 8, 3, spans, nullptr, UNIQUE_PBIT_DISCOUNT, mode_total_c[mode_iter][subset]); + } + else + { + // mode 2: 5-bit endpoints, no p-bits, 2-bit weights, no endpoint scale factor + subset_sse = analytical_quant_est_sse(32, 4, 3, spans, nullptr, 1.0f, mode_total_c[mode_iter][subset]); + } + + total_quant_sse_mode[mode_iter] += subset_sse; + } // subset + + } // mode_iter + + const float total_sse_est_mode0 = total_quant_sse_mode[0] + total_slam_to_line_sse_mode[0]; + const float total_sse_est_mode2 = total_quant_sse_mode[1] + total_slam_to_line_sse_mode[1]; + + if (total_sse_est_mode0 < total_sse_est_mode2) + { + if (pFinal_sse_est) + *pFinal_sse_est = total_sse_est_mode0; + + // Use mode 0 (high span) + if (total_sse_est_mode0 >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode02_bailouts++; +#endif + return false; + } + + const uint32_t best_pat_index = best_pat_indices[0]; + const uint8_t* pBest_pat = &g_bc7_partition3[best_pat_index * 16]; + + const int* pLow_c = &mode_low_c[0][0]; + const int* pHigh_c = &mode_high_c[0][0]; + + const int* pTotal_r = &mode_total_r[0][0]; + const int* pTotal_g = &mode_total_g[0][0]; + const int* pTotal_b = &mode_total_b[0][0]; + + float xl[3][4], xh[3][4]; + + for (uint32_t s = 0; s < 3; s++) + { + const int lc = pLow_c[s]; + const int hc = pHigh_c[s]; + + xl[s][0] = (float)pPixels[lc].r * (1.0f / 255.0f); + xl[s][1] = (float)pPixels[lc].g * (1.0f / 255.0f); + xl[s][2] = (float)pPixels[lc].b * (1.0f / 255.0f); + xl[s][3] = 0.0f; + + xh[s][0] = (float)pPixels[hc].r * (1.0f / 255.0f); + xh[s][1] = (float)pPixels[hc].g * (1.0f / 255.0f); + xh[s][2] = (float)pPixels[hc].b * (1.0f / 255.0f); + xh[s][3] = 0.0f; + } // s + + uint32_t lr[3], lg[3], lb[3], hr[3], hg[3], hb[3], pbits[6]; + + for (uint32_t s = 0; s < 3; s++) + { + color_rgba el, eh; + determine_unique_pbits(3, 4, xl[s], xh[s], el, eh, &pbits[s << 1]); + + lr[s] = el[0]; lg[s] = el[1]; lb[s] = el[2]; + hr[s] = eh[0]; hg[s] = eh[1]; hb[s] = eh[2]; + + } // s + + uint8_t cur_weights[16]; + eval_weights_mode0_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_index); + + float z00[3] = { 0.0f }, z10[3] = { 0.0f }, z11[3] = { 0.0f }; + float q00_r[3] = { 0.0f }; + float q00_g[3] = { 0.0f }; + float q00_b[3] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = pBest_pat[i]; + const uint32_t sel = cur_weights[i]; + assert(sel <= 7); + + z00[subset] += g_bc7_3bit_ls_tab[sel][0]; + z10[subset] += g_bc7_3bit_ls_tab[sel][1]; + z11[subset] += g_bc7_3bit_ls_tab[sel][2]; + + const float w = g_bc7_3bit_ls_tab[sel][3]; + + q00_r[subset] += w * (float)pPixels[i][0]; + q00_g[subset] += w * (float)pPixels[i][1]; + q00_b[subset] += w * (float)pPixels[i][2]; + } // i + + for (uint32_t s = 0; s < 3; s++) + { + float q10_r = (float)pTotal_r[s] - q00_r[s]; + float q10_g = (float)pTotal_g[s] - q00_g[s]; + float q10_b = (float)pTotal_b[s] - q00_b[s]; + + float z01 = z10[s]; + + float det = z00[s] * z11[s] - z01 * z10[s]; + if (fabs(det) < 1e-8f) + continue; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11[s] * det; + iz01 = -z01 * det; + iz10 = -z10[s] * det; + iz11 = z00[s] * det; + + const float q = 1.0f / 255.0f; + + xl[s][0] = basisu::clamp(q * (iz10 * q00_r[s] + iz11 * q10_r), 0.0f, 1.0f); + xh[s][0] = basisu::clamp(q * (iz00 * q00_r[s] + iz01 * q10_r), 0.0f, 1.0f); + + xl[s][1] = basisu::clamp(q * (iz10 * q00_g[s] + iz11 * q10_g), 0.0f, 1.0f); + xh[s][1] = basisu::clamp(q * (iz00 * q00_g[s] + iz01 * q10_g), 0.0f, 1.0f); + + xl[s][2] = basisu::clamp(q * (iz10 * q00_b[s] + iz11 * q10_b), 0.0f, 1.0f); + xh[s][2] = basisu::clamp(q * (iz00 * q00_b[s] + iz01 * q10_b), 0.0f, 1.0f); + } // s + + for (uint32_t s = 0; s < 3; s++) + { + color_rgba el, eh; + determine_unique_pbits(3, 4, xl[s], xh[s], el, eh, &pbits[s << 1]); // fills in both pbit entries + + lr[s] = el[0]; lg[s] = el[1]; lb[s] = el[2]; + hr[s] = eh[0]; hg[s] = eh[1]; hb[s] = eh[2]; + + } // s + + if (pActual_sse) + *pActual_sse = eval_weights_mode0_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_index); + else + eval_weights_mode0_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, pbits, best_pat_index); + + encode_mode0_rgb_block(pBlock, best_pat_index, lr, lg, lb, hr, hg, hb, pbits, cur_weights); + } + else + { + if (pFinal_sse_est) + *pFinal_sse_est = total_sse_est_mode2; + + // Use mode 2 (low span) + if (total_sse_est_mode2 >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode02_bailouts++; +#endif + return false; + } + + const uint32_t best_pat_index = best_pat_indices[1]; + const uint8_t* pBest_pat = &g_bc7_partition3[best_pat_index * 16]; + + const int* pLow_c = &mode_low_c[1][0]; + const int* pHigh_c = &mode_high_c[1][0]; + + const int* pTotal_r = &mode_total_r[1][0]; + const int* pTotal_g = &mode_total_g[1][0]; + const int* pTotal_b = &mode_total_b[1][0]; + + uint32_t lr[3], lg[3], lb[3]; + uint32_t hr[3], hg[3], hb[3]; + + for (uint32_t s = 0; s < 3; s++) + { + const int lc = pLow_c[s]; + const int hc = pHigh_c[s]; + + lr[s] = to_5(pPixels[lc].r); + lg[s] = to_5(pPixels[lc].g); + lb[s] = to_5(pPixels[lc].b); + + hr[s] = to_5(pPixels[hc].r); + hg[s] = to_5(pPixels[hc].g); + hb[s] = to_5(pPixels[hc].b); + } + + uint8_t cur_weights[16]; + eval_weights_mode2_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, best_pat_index); + + float z00[3] = { 0.0f }, z10[3] = { 0.0f }, z11[3] = { 0.0f }; + float q00_r[3] = { 0.0f }; + float q00_g[3] = { 0.0f }; + float q00_b[3] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = pBest_pat[i]; + const uint32_t sel = cur_weights[i]; + assert(sel <= 3); + + z00[subset] += g_bc7_2bit_ls_tab[sel][0]; + z10[subset] += g_bc7_2bit_ls_tab[sel][1]; + z11[subset] += g_bc7_2bit_ls_tab[sel][2]; + + const float w = g_bc7_2bit_ls_tab[sel][3]; + + q00_r[subset] += w * (float)pPixels[i][0]; + q00_g[subset] += w * (float)pPixels[i][1]; + q00_b[subset] += w * (float)pPixels[i][2]; + } // i + + for (uint32_t s = 0; s < 3; s++) + { + float q10_r = (float)pTotal_r[s] - q00_r[s]; + float q10_g = (float)pTotal_g[s] - q00_g[s]; + float q10_b = (float)pTotal_b[s] - q00_b[s]; + + float z01 = z10[s]; + + float det = z00[s] * z11[s] - z01 * z10[s]; + if (fabs(det) < 1e-8f) + continue; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11[s] * det; + iz01 = -z01 * det; + iz10 = -z10[s] * det; + iz11 = z00[s] * det; + + hr[s] = to_5_clamp(iz00 * q00_r[s] + iz01 * q10_r); + lr[s] = to_5_clamp(iz10 * q00_r[s] + iz11 * q10_r); + + hg[s] = to_5_clamp(iz00 * q00_g[s] + iz01 * q10_g); + lg[s] = to_5_clamp(iz10 * q00_g[s] + iz11 * q10_g); + + hb[s] = to_5_clamp(iz00 * q00_b[s] + iz01 * q10_b); + lb[s] = to_5_clamp(iz10 * q00_b[s] + iz11 * q10_b); + } // s + + if (pActual_sse) + *pActual_sse = eval_weights_mode2_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, best_pat_index); + else + eval_weights_mode2_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, best_pat_index); + + encode_mode2_rgb_block(pBlock, best_pat_index, + lr, lg, lb, hr, hg, hb, cur_weights); + } + +#ifdef _DEBUG + if (pActual_sse) + { + const uint32_t expected_sse = calc_sse(pBlock, pPixels); + assert(expected_sse == *pActual_sse); + } +#endif + + return true; + } + + bool pack_mode4_or_5(uint8_t* pBlock, const color_rgba* pOrig_pixels, uint32_t dp_chan_index, float sse_est_to_beat, uint32_t flags, + float* pFinal_sse_est = nullptr, + uint32_t* pActual_sse = nullptr) + { + (void)flags; + +#if BASISU_BC7F_PERF_STATS + g_total_mode45_evals++; +#endif + + color_rgba pixels[16]; + const color_rgba* pPixels = pOrig_pixels; + + if (dp_chan_index != 3) + { + memcpy(pixels, pOrig_pixels, sizeof(color_rgba) * 16); + pPixels = pixels; + + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t c = pixels[i][dp_chan_index]; + pixels[i][dp_chan_index] = pixels[i][3]; + pixels[i][3] = c; + } + } + + int total_r = 0, total_g = 0, total_b = 0, total_a = 0; + + int min_r = 255, min_g = 255, min_b = 255, min_a = 255; + int max_r = 0, max_g = 0, max_b = 0, max_a = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b, a = pPixels[i].a; + + total_r += r; total_g += g; total_b += b; total_a += a; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); min_a = basisu::minimum(min_a, a); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); max_a = basisu::maximum(max_a, a); + } + + int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4; + + // covar rows are: + // 0, 1, 2 + // 1, 3, 4 + // 2, 4, 5 + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = (int)pPixels[i].r - mean_r; + const int g = (int)pPixels[i].g - mean_g; + const int b = (int)pPixels[i].b - mean_b; + icov[0] += r * r; icov[1] += r * g; icov[2] += r * b; + icov[3] += g * g; icov[4] += g * b; + icov[5] += b * b; + } + + float cov3[6]; + for (uint32_t i = 0; i < 6; i++) + cov3[i] = (float)icov[i]; + + const int block_max_var3 = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 + + const float sc3 = block_max_var3 ? (1.0f / (float)block_max_var3) : 0; + const float wx3 = sc3 * cov3[0], wy3 = sc3 * cov3[3], wz3 = sc3 * cov3[5]; + + const float alt_xr = cov3[0] * wx3 + cov3[1] * wy3 + cov3[2] * wz3; + const float alt_xg = cov3[1] * wx3 + cov3[3] * wy3 + cov3[4] * wz3; + const float alt_xb = cov3[2] * wx3 + cov3[4] * wy3 + cov3[5] * wz3; + + // Same for mode 4/5 + const float rgb_slam_to_line_sse_est = estimate_slam_to_line_sse_3D(cov3, alt_xr, alt_xg, alt_xb); + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + + for (uint32_t i = 0; i < 16; i += 4) + { + int dot0 = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) + i; + int dot1 = (pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) + i + 1; + int dot2 = (pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) + i + 2; + int dot3 = (pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + + const int low_c = low_dot & 15; + const int high_c = high_dot & 15; + + const int rgb_spans[4] = { pPixels[high_c][0] - pPixels[low_c][0], pPixels[high_c][1] - pPixels[low_c][1], pPixels[high_c][2] - pPixels[low_c][2], 0 }; + const int a_span = max_a - min_a; + + const float SECOND_PLANE_SPAN_WEIGHT = (dp_chan_index == 3) ? 1.0f : 1.0f; + + const float mode_4_rgb_3bit_quant_sse_est = analytical_quant_est_sse(32, 8, 3, rgb_spans, nullptr, 1.0f, 16); // mode 4 rgb: 5-bit endpoints, using 3-bit weights for RGB + const float mode_4_a_2bit_quant_sse_est = analytical_quant_est_sse(64, 4, a_span, SECOND_PLANE_SPAN_WEIGHT, 1.0f, 16); // mode 4 a: 6-bit endpoints, using 2-bit weights for RGB + + const float mode_4_rgb_2bit_quant_sse_est = analytical_quant_est_sse(32, 4, 3, rgb_spans, nullptr, 1.0f, 16); // mode 4 rgb: 5-bit endpoints, using 2-bit weights for RGB + const float mode_4_a_3bit_quant_sse_est = analytical_quant_est_sse(64, 8, a_span, SECOND_PLANE_SPAN_WEIGHT, 1.0f, 16); // mode 4 a: 6-bit endpoints, using 3-bit weights for RGB + + const float total_mode_4_rgb3_a2_sse_est = rgb_slam_to_line_sse_est + mode_4_rgb_3bit_quant_sse_est + mode_4_a_2bit_quant_sse_est; + const float total_mode_4_rgb2_a3_sse_est = rgb_slam_to_line_sse_est + mode_4_rgb_2bit_quant_sse_est + mode_4_a_3bit_quant_sse_est; + + const float mode_5_rgb_quant_sse_est = analytical_quant_est_sse(128, 4, 3, rgb_spans, nullptr, 1.0f, 16); // mode 5 rgb: 7-bit endpoints, using 2-bit weights for RGB + const float mode_5_a_quant_sse_est = analytical_quant_est_sse(256, 4, a_span, SECOND_PLANE_SPAN_WEIGHT, 1.0f, 16); // mode 5 a: 8-bit endpoints, using 2-bit weights for RGB + const float total_mode_5_rgba_sse_est = rgb_slam_to_line_sse_est + mode_5_rgb_quant_sse_est + mode_5_a_quant_sse_est; + + if (total_mode_5_rgba_sse_est < basisu::minimum(total_mode_4_rgb3_a2_sse_est, total_mode_4_rgb2_a3_sse_est)) + { + if (pFinal_sse_est) + *pFinal_sse_est = total_mode_5_rgba_sse_est; + + // Mode 5 - low RGB/A span + if (total_mode_5_rgba_sse_est >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode45_bailouts++; +#endif + return false; + } + + int lr = to_7(pPixels[low_c].r), lg = to_7(pPixels[low_c].g), lb = to_7(pPixels[low_c].b), la = min_a; + int hr = to_7(pPixels[high_c].r), hg = to_7(pPixels[high_c].g), hb = to_7(pPixels[high_c].b), ha = max_a; + + uint8_t cur_weights0[16]; // rgb 2-bits + if (pActual_sse) + *pActual_sse = eval_weights_mode5_2bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode5_2bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + + vec4F xl, xh; + bool res = compute_least_squares_endpoints_3D( + 16, cur_weights0, 4, + g_bc7_2bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b); + + if (res) + { + lr = fast_roundf_int(xl[0] * (127.0f / 255.0f)); + lg = fast_roundf_int(xl[1] * (127.0f / 255.0f)); + lb = fast_roundf_int(xl[2] * (127.0f / 255.0f)); + + hr = fast_roundf_int(xh[0] * (127.0f / 255.0f)); + hg = fast_roundf_int(xh[1] * (127.0f / 255.0f)); + hb = fast_roundf_int(xh[2] * (127.0f / 255.0f)); + + if (pActual_sse) + *pActual_sse = eval_weights_mode5_2bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode5_2bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + } + + uint8_t cur_weights1[16]; // alpha 2-bits + uint32_t a_sse = 0; + if (pActual_sse) + a_sse = eval_weights_mode5_2bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode5_2bit_a(pPixels, cur_weights1, la, ha); + + float nal, nah; + if (compute_least_squares_endpoints_1D( + 16, cur_weights1, 4, + g_bc7_2bit_ls_tab, + nal, nah, + pPixels, 3, + (float)total_a)) + { + la = fast_roundf_int(nal); + ha = fast_roundf_int(nah); + + if (pActual_sse) + a_sse = eval_weights_mode5_2bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode5_2bit_a(pPixels, cur_weights1, la, ha); + } + + if (pActual_sse) + *pActual_sse += a_sse; + + encode_mode5_rgba_block(pBlock, + lr, lg, lb, la, + hr, hg, hb, ha, + cur_weights0, cur_weights1, (dp_chan_index + 1) & 3); + } + else if (total_mode_4_rgb3_a2_sse_est < total_mode_4_rgb2_a3_sse_est) + { + if (pFinal_sse_est) + *pFinal_sse_est = total_mode_4_rgb3_a2_sse_est; + + // mode 4, rgb 3-bits, alpha 2-bits - high span RGB, low span in A, index bit=1 + if (total_mode_4_rgb3_a2_sse_est >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode45_bailouts++; +#endif + return false; + } + + int lr = to_5(pPixels[low_c].r), lg = to_5(pPixels[low_c].g), lb = to_5(pPixels[low_c].b), la = to_6(min_a); + int hr = to_5(pPixels[high_c].r), hg = to_5(pPixels[high_c].g), hb = to_5(pPixels[high_c].b), ha = to_6(max_a); + + uint8_t cur_weights0[16]; // rgb 3-bits + if (pActual_sse) + *pActual_sse = eval_weights_mode4_3bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode4_3bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + + vec4F xl, xh; + bool res = compute_least_squares_endpoints_3D( + 16, cur_weights0, 8, + g_bc7_3bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b); + + if (res) + { + lr = fast_roundf_int(xl[0] * (31.0f / 255.0f)); + lg = fast_roundf_int(xl[1] * (31.0f / 255.0f)); + lb = fast_roundf_int(xl[2] * (31.0f / 255.0f)); + + hr = fast_roundf_int(xh[0] * (31.0f / 255.0f)); + hg = fast_roundf_int(xh[1] * (31.0f / 255.0f)); + hb = fast_roundf_int(xh[2] * (31.0f / 255.0f)); + + if (pActual_sse) + *pActual_sse = eval_weights_mode4_3bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode4_3bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + } + + uint8_t cur_weights1[16]; // alpha 2-bits + + uint32_t a_sse = 0; + if (pActual_sse) + a_sse = eval_weights_mode4_2bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode4_2bit_a(pPixels, cur_weights1, la, ha); + + float nal, nah; + if (compute_least_squares_endpoints_1D( + 16, cur_weights1, 4, + g_bc7_2bit_ls_tab, + nal, nah, + pPixels, 3, + (float)total_a)) + { + la = fast_roundf_int(nal * (63.0f / 255.0f)); + ha = fast_roundf_int(nah * (63.0f / 255.0f)); + + if (pActual_sse) + a_sse = eval_weights_mode4_2bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode4_2bit_a(pPixels, cur_weights1, la, ha); + } + + if (pActual_sse) + *pActual_sse += a_sse; + + encode_mode4_rgba_block(pBlock, + lr, lg, lb, la, + hr, hg, hb, ha, + cur_weights0, cur_weights1, (dp_chan_index + 1) & 3, 1); + } + else + { + if (pFinal_sse_est) + *pFinal_sse_est = total_mode_4_rgb2_a3_sse_est; + + // mode 4, rgb 2-bits, alpha 3-bits - low span RGB, high span in A, index bit=0 + if (total_mode_4_rgb2_a3_sse_est >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode45_bailouts++; +#endif + return false; + } + + int lr = to_5(pPixels[low_c].r), lg = to_5(pPixels[low_c].g), lb = to_5(pPixels[low_c].b), la = to_6(min_a); + int hr = to_5(pPixels[high_c].r), hg = to_5(pPixels[high_c].g), hb = to_5(pPixels[high_c].b), ha = to_6(max_a); + + uint8_t cur_weights0[16]; // rgb 2-bits + if (pActual_sse) + *pActual_sse = eval_weights_mode4_2bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode4_2bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + + vec4F xl, xh; + bool res = compute_least_squares_endpoints_3D( + 16, cur_weights0, 4, + g_bc7_2bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b); + + if (res) + { + lr = fast_roundf_int(xl[0] * (31.0f / 255.0f)); + lg = fast_roundf_int(xl[1] * (31.0f / 255.0f)); + lb = fast_roundf_int(xl[2] * (31.0f / 255.0f)); + + hr = fast_roundf_int(xh[0] * (31.0f / 255.0f)); + hg = fast_roundf_int(xh[1] * (31.0f / 255.0f)); + hb = fast_roundf_int(xh[2] * (31.0f / 255.0f)); + + if (pActual_sse) + *pActual_sse = eval_weights_mode4_2bit_rgb_sse(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + else + eval_weights_mode4_2bit_rgb(pPixels, cur_weights0, lr, lg, lb, hr, hg, hb); + } + + uint8_t cur_weights1[16]; // alpha 2-bits + uint32_t a_sse = 0; + if (pActual_sse) + a_sse = eval_weights_mode4_3bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode4_3bit_a(pPixels, cur_weights1, la, ha); + + float nal, nah; + if (compute_least_squares_endpoints_1D( + 16, cur_weights1, 8, + g_bc7_3bit_ls_tab, + nal, nah, + pPixels, 3, + (float)total_a)) + { + la = fast_roundf_int(nal * (63.0f / 255.0f)); + ha = fast_roundf_int(nah * (63.0f / 255.0f)); + + if (pActual_sse) + a_sse = eval_weights_mode4_3bit_a_sse(pPixels, cur_weights1, la, ha); + else + eval_weights_mode4_3bit_a(pPixels, cur_weights1, la, ha); + } + + if (pActual_sse) + *pActual_sse += a_sse; + + encode_mode4_rgba_block(pBlock, + lr, lg, lb, la, + hr, hg, hb, ha, + cur_weights0, cur_weights1, (dp_chan_index + 1) & 3, 0); + } + +#ifdef _DEBUG + if (pActual_sse) + { + const uint32_t expected_sse = calc_sse(pBlock, pOrig_pixels); + assert(expected_sse == *pActual_sse); + } +#endif + + return true; + } + + bool pack_mode7_rgba(uint8_t* pBlock, const color_rgba* pPixels, + float block_xr, float block_xg, float block_xb, float block_xa, + int block_mean_r, int block_mean_g, int block_mean_b, int block_mean_a, + float sse_est_to_beat, uint32_t flags, + float* pFinal_sse_est = nullptr, + uint32_t* pActual_sse = nullptr) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode7_evals++; +#endif + + uint32_t desired_pat_bits = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const float r = (float)(pPixels[i].r - block_mean_r); + const float g = (float)(pPixels[i].g - block_mean_g); + const float b = (float)(pPixels[i].b - block_mean_b); + const float a = (float)(pPixels[i].a - block_mean_a); + + const uint32_t subset = (r * block_xr + g * block_xg + b * block_xb + a * block_xa) > 0.0f; + + desired_pat_bits |= (subset << i); + } + + uint32_t best_diff = UINT32_MAX; + for (uint32_t p = 0; p < MAX_PATTERNS2_TO_CHECK; p++) + { + const uint32_t bc6h_pat_bits = g_bc7_part2_bitmasks[p]; + + int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits); + int diff_inv = 16 - diff; + + uint32_t min_diff = (basisu::minimum(diff, diff_inv) << 8) | p; + if (min_diff < best_diff) + best_diff = min_diff; + } // p + + const uint32_t best_pat_index = best_diff & 0xFF; + const uint32_t best_pat_bits = g_bc7_part2_bitmasks[best_pat_index]; + + int total_r[2] = { }, total_g[2] = { }, total_b[2] = { }, total_a[2] = { }, total_c[2] = { }; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b, a = pPixels[i].a; + const int subset = (best_pat_bits >> i) & 1; + + total_r[subset] += r; total_g[subset] += g; total_b[subset] += b; total_a[subset] += a; + total_c[subset]++; + } + + int mean_r[2], mean_g[2], mean_b[2], mean_a[2]; + for (uint32_t s = 0; s < 2; s++) + { + const uint32_t t = total_c[s]; + const uint32_t h = (t >> 1); + + mean_r[s] = (total_r[s] + h) / t; + mean_g[s] = (total_g[s] + h) / t; + mean_b[s] = (total_b[s] + h) / t; + mean_a[s] = (total_a[s] + h) / t; + } + + int icov4[2][10] = { { }, { } }; + + // 0=rr + // 1=rg + // 2=rb + // 3=ra + // + // 4=gg + // 5=gb + // 6=ga + // + // 7=bb + // 8=ba + // + // 9=aa + + // 0 1 2 3 + // 4 5 6 + // 7 8 + // 9 + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + // trace at 0,4,7,9 + + for (uint32_t i = 0; i < 16; i++) + { + const int s = (best_pat_bits >> i) & 1; + + int r = (int)pPixels[i].r - mean_r[s]; + int g = (int)pPixels[i].g - mean_g[s]; + int b = (int)pPixels[i].b - mean_b[s]; + int a = (int)pPixels[i].a - mean_a[s]; + + icov4[s][0] += r * r; icov4[s][1] += r * g; icov4[s][2] += r * b; icov4[s][3] += r * a; + icov4[s][4] += g * g; icov4[s][5] += g * b; icov4[s][6] += g * a; + icov4[s][7] += b * b; icov4[s][8] += b * a; + icov4[s][9] += a * a; + } + + int ar[2], ag[2], ab[2], aa[2]; + + float slam_to_line_sse_est = 0.0f; + + for (uint32_t s = 0; s < 2; s++) + { + const int block_max_var4 = basisu::maximum(icov4[s][0], icov4[s][4], icov4[s][7], icov4[s][9]); + + float cov4[10]; + for (uint32_t i = 0; i < 10; i++) + cov4[i] = (float)icov4[s][i]; + + const float sc4 = block_max_var4 ? (1.0f / (float)block_max_var4) : 0; + const float wx = sc4 * cov4[0], wy = sc4 * cov4[4], wz = sc4 * cov4[7], wa = sc4 * cov4[9]; + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + const float x0 = cov4[0] * wx + cov4[1] * wy + cov4[2] * wz + cov4[3] * wa; + const float y0 = cov4[1] * wx + cov4[4] * wy + cov4[5] * wz + cov4[6] * wa; + const float z0 = cov4[2] * wx + cov4[5] * wy + cov4[7] * wz + cov4[8] * wa; + const float w0 = cov4[3] * wx + cov4[6] * wy + cov4[8] * wz + cov4[9] * wa; + + const float x1 = cov4[0] * x0 + cov4[1] * y0 + cov4[2] * z0 + cov4[3] * w0; + const float y1 = cov4[1] * x0 + cov4[4] * y0 + cov4[5] * z0 + cov4[6] * w0; + const float z1 = cov4[2] * x0 + cov4[5] * y0 + cov4[7] * z0 + cov4[8] * w0; + const float w1 = cov4[3] * x0 + cov4[6] * y0 + cov4[8] * z0 + cov4[9] * w0; + + slam_to_line_sse_est += estimate_slam_to_line_sse_4D(cov4, x1, y1, z1, w1); + + int saxis_r = 256, saxis_g = 256, saxis_b = 256, saxis_a = 256; + + float k = basisu::maximum(fabsf(x1), fabsf(y1), fabsf(z1), fabsf(w1)); + if (fabsf(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(x1 * m); + saxis_g = (int)(y1 * m); + saxis_b = (int)(z1 * m); + saxis_a = (int)(w1 * m); + } + + ar[s] = (int)((uint32_t)saxis_r << 4U); + ag[s] = (int)((uint32_t)saxis_g << 4U); + ab[s] = (int)((uint32_t)saxis_b << 4U); + aa[s] = (int)((uint32_t)saxis_a << 4U); + } // s + + int low_dot[2] = { INT_MAX, INT_MAX }; + int high_dot[2] = { INT_MIN, INT_MIN }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + const int saxis_r = ar[subset], saxis_g = ag[subset], saxis_b = ab[subset], saxis_a = aa[subset]; + + assert(((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) & 0xF) == 0); // sanity + const int dot = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) + i; + + low_dot[subset] = basisu::minimum(low_dot[subset], dot); + high_dot[subset] = basisu::maximum(high_dot[subset], dot); + } + + int low_c[2] = { low_dot[0] & 15, low_dot[1] & 15 }; + int high_c[2] = { high_dot[0] & 15, high_dot[1] & 15 }; + + float quant_err_sse_est = 0; + + for (uint32_t subset = 0; subset < 2; subset++) + { + const uint32_t low_pixel = low_c[subset]; + const uint32_t high_pixel = high_c[subset]; + + int spans[4]; + for (uint32_t c = 0; c < 4; c++) + spans[c] = pPixels[high_pixel][c] - pPixels[low_pixel][c]; + + // mode 7: 5-bit endpoints, unique pbits, 2 bit weights, 4 chans + quant_err_sse_est += analytical_quant_est_sse(32, 4, 4, spans, nullptr, (flags & cPackBC7FlagPBitOpt) ? UNIQUE_PBIT_DISCOUNT : 1.0f, total_c[subset]); + + } // subset + + const float total_mode7_est_sse = slam_to_line_sse_est + quant_err_sse_est; + + if (pFinal_sse_est) + *pFinal_sse_est = total_mode7_est_sse; + + if (total_mode7_est_sse >= sse_est_to_beat) + { +#if BASISU_BC7F_PERF_STATS + g_total_mode7_bailouts++; +#endif + return false; + } + + uint32_t lr[2], lg[2], lb[2], la[2]; + uint32_t hr[2], hg[2], hb[2], ha[2]; + uint32_t pbits[4]; + + for (uint32_t s = 0; s < 2; s++) + { + const int lc = low_c[s], hc = high_c[s]; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[lc].r * q, (float)pPixels[lc].g * q, (float)pPixels[lc].b * q, (float)pPixels[lc].a * q }; + float sxh[4] = { (float)pPixels[hc].r * q, (float)pPixels[hc].g * q, (float)pPixels[hc].b * q, (float)pPixels[hc].a * q }; + + color_rgba bestMinColor, bestMaxColor; + determine_unique_pbits(4, 5, sxl, sxh, bestMinColor, bestMaxColor, &pbits[s * 2]); + + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; la[s] = bestMinColor.a; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; ha[s] = bestMaxColor.a; + } + else + { + const uint32_t l_pbit = (pPixels[lc].a >= 129); + const uint32_t h_pbit = (pPixels[hc].a >= 129); + + pbits[s * 2 + 0] = l_pbit; + pbits[s * 2 + 1] = h_pbit; + + lr[s] = to_5(pPixels[lc].r, l_pbit); + lg[s] = to_5(pPixels[lc].g, l_pbit); + lb[s] = to_5(pPixels[lc].b, l_pbit); + la[s] = to_5(pPixels[lc].a, l_pbit); + + hr[s] = to_5(pPixels[hc].r, h_pbit); + hg[s] = to_5(pPixels[hc].g, h_pbit); + hb[s] = to_5(pPixels[hc].b, h_pbit); + ha[s] = to_5(pPixels[hc].a, h_pbit); + } + } // s + + uint8_t cur_weights[16]; + + eval_weights_mode7_rgba(pPixels, cur_weights, + lr, lg, lb, la, + hr, hg, hb, ha, + pbits, best_pat_bits); + + float z00[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f }; + float q00_r[2] = { 0.0f }; + float q00_g[2] = { 0.0f }; + float q00_b[2] = { 0.0f }; + float q00_a[2] = { 0.0f }; + + for (uint32_t i = 0; i < 16; i++) + { + const int subset = (best_pat_bits >> i) & 1; + const uint32_t sel = cur_weights[i]; + assert(sel <= 3); + + z00[subset] += g_bc7_2bit_ls_tab[sel][0]; + z10[subset] += g_bc7_2bit_ls_tab[sel][1]; + z11[subset] += g_bc7_2bit_ls_tab[sel][2]; + + const float w = g_bc7_2bit_ls_tab[sel][3]; + + q00_r[subset] += w * (float)pPixels[i][0]; + q00_g[subset] += w * (float)pPixels[i][1]; + q00_b[subset] += w * (float)pPixels[i][2]; + q00_a[subset] += w * (float)pPixels[i][3]; + } // i + + for (uint32_t s = 0; s < 2; s++) + { + float q10_r = (float)total_r[s] - q00_r[s]; + float q10_g = (float)total_g[s] - q00_g[s]; + float q10_b = (float)total_b[s] - q00_b[s]; + float q10_a = (float)total_a[s] - q00_a[s]; + + float z01 = z10[s]; + + float det = z00[s] * z11[s] - z01 * z10[s]; + if (fabsf(det) < 1e-8f) + continue; + + det = 1.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11[s] * det; + iz01 = -z01 * det; + iz10 = -z10[s] * det; + iz11 = z00[s] * det; + + const float slr = iz10 * q00_r[s] + iz11 * q10_r; + const float shr = iz00 * q00_r[s] + iz01 * q10_r; + + const float slg = iz10 * q00_g[s] + iz11 * q10_g; + const float shg = iz00 * q00_g[s] + iz01 * q10_g; + + const float slb = iz10 * q00_b[s] + iz11 * q10_b; + const float shb = iz00 * q00_b[s] + iz01 * q10_b; + + const float sla = iz10 * q00_a[s] + iz11 * q10_a; + const float sha = iz00 * q00_a[s] + iz01 * q10_a; + + if (flags & cPackBC7FlagPBitOpt) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { basisu::clamp(slr * q, 0.0f, 1.0f), basisu::clamp(slg * q, 0.0f, 1.0f), basisu::clamp(slb * q, 0.0f, 1.0f), basisu::clamp(sla * q, 0.0f, 1.0f) }; + float sxh[4] = { basisu::clamp(shr * q, 0.0f, 1.0f), basisu::clamp(shg * q, 0.0f, 1.0f), basisu::clamp(shb * q, 0.0f, 1.0f), basisu::clamp(sha * q, 0.0f, 1.0f) }; + + color_rgba bestMinColor, bestMaxColor; + determine_unique_pbits(4, 5, sxl, sxh, bestMinColor, bestMaxColor, &pbits[s * 2]); + + lr[s] = bestMinColor.r, lg[s] = bestMinColor.g, lb[s] = bestMinColor.b; la[s] = bestMinColor.a; + hr[s] = bestMaxColor.r, hg[s] = bestMaxColor.g, hb[s] = bestMaxColor.b; ha[s] = bestMaxColor.a; + } + else + { + const uint32_t l_pbit = (sla >= 129.0f); + const uint32_t h_pbit = (sha >= 129.0f); + + pbits[s * 2 + 0] = l_pbit; + pbits[s * 2 + 1] = h_pbit; + + lr[s] = to_5_clamp(slr, l_pbit); + lg[s] = to_5_clamp(slg, l_pbit); + lb[s] = to_5_clamp(slb, l_pbit); + la[s] = to_5_clamp(sla, l_pbit); + + hr[s] = to_5_clamp(shr, h_pbit); + hg[s] = to_5_clamp(shg, h_pbit); + hb[s] = to_5_clamp(shb, h_pbit); + ha[s] = to_5_clamp(sha, h_pbit); + } + + } // s + + if (pActual_sse) + { + *pActual_sse = eval_weights_mode7_rgba_sse(pPixels, cur_weights, + lr, lg, lb, la, + hr, hg, hb, ha, + pbits, best_pat_bits); + } + else + { + eval_weights_mode7_rgba(pPixels, cur_weights, + lr, lg, lb, la, + hr, hg, hb, ha, + pbits, best_pat_bits); + } + + encode_mode7_rgba_block(pBlock, best_pat_index, + lr, lg, lb, la, + hr, hg, hb, ha, + pbits, cur_weights); + +#ifdef _DEBUG + if (pActual_sse) + { + const uint32_t expected_sse = calc_sse(pBlock, pPixels); + assert(expected_sse == *pActual_sse); + } +#endif + + return true; + } + + const int TRIVIAL_BLOCK_THRESH_RGB = 20 * 16; // skip PCA/LS threshold (uses trivial mode 6 encoder) + const int TRIVIAL_BLOCK_THRESH_RGBA = 2 * 16; + + // dual plane +#if 0 + const int DP_BLOCK_VAR_THRESH = 1 * 16; // use dual plane threshold + const float STRONG_CORR_THRESH = .98f; +#else + const int DP_BLOCK_VAR_THRESH = 2 * 16; // use dual plane threshold + const float STRONG_CORR_THRESH = .85f; +#endif + + // 2-3 subsets +#if 0 + const float HIGH_ORTHO_ENERGY_THRESH = 1.0f * 16.0f; // use 2+ subsets threshold + const int MIN_BLOCK_MAX_VAR_23SUBSETS = 4 * 16; + const float ORTHO_RATIO_23SUBSET_RATIO_THRESH = .004f; +#else + const int MIN_BLOCK_MAX_VAR_23SUBSETS = 100 * 16; + const float HIGH_ORTHO_ENERGY_THRESH = 1.0f * 16.0f; // use 2+ subsets threshold + const float ORTHO_RATIO_23SUBSET_RATIO_THRESH = .004f; +#endif + +#if 0 + const int DP_BLOCK_VAR_THRESH_RGBA = 2 * 16; // use dual plane threshold + const float ALPHA_DECORR_THRESHOLD = .9f; + const float STRONG_DECORR_THRESH_RGBA = .85f; +#else + const int DP_BLOCK_VAR_THRESH_RGBA = 1 * 16; // use dual plane threshold + //const float ALPHA_DECORR_THRESHOLD = .98f; + const float ALPHA_DECORR_THRESHOLD = .995f; + const float STRONG_DECORR_THRESH_RGBA = .85f; +#endif + + // 3 subsets + const int MIN_BLOCK_MAX_VAR_3SUBSETS = 500 * 16; // use 3 subsets threshold + + //------------------------------------------------------------------------------------------------------- + + // Note: solid block check assumes A's all == 255. + void fast_pack_bc7_rgb_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { + assert(g_bc7_4bit_ls_tab[1][0]); + +#if BASISU_BC7F_PERF_STATS + g_total_rgb_calls++; +#endif + + const uint32_t fc = *(const uint32_t*)&pPixels[0]; + if (fc == *(const uint32_t*)&pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (*(const uint32_t*)&pPixels[k] != fc) + break; + + if (k == 15) + { +#if BASISU_BC7F_PERF_STATS + g_total_solid_blocks++; +#endif + + pack_mode5_solid(pBlock, pPixels[0]); + return; + } + } + + int total_r = 0, total_g = 0, total_b = 0; + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + + total_r += r; total_g += g; total_b += b; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + } + + int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4; + + // covar rows are: + // 0, 1, 2 + // 1, 3, 4 + // 2, 4, 5 + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pPixels[i].r - mean_r; + int g = (int)pPixels[i].g - mean_g; + int b = (int)pPixels[i].b - mean_b; + icov[0] += r * r; icov[1] += r * g; icov[2] += r * b; + icov[3] += g * g; icov[4] += g * b; + icov[5] += b * b; + } + + int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 + + // not redundant due to uint32_t test above, which could be fooled by alpha accidentally passed in + if (!block_max_var) + { +#if BASISU_BC7F_PERF_STATS + g_total_solid_blocks++; +#endif + pack_mode5_solid(pBlock, pPixels[0]); + return; + } + + // check for dual plane, if a single component is very strongly decorrelated then switch to modes 4/5 + int desired_dp_chan = -1; + + if ((flags & cPackBC7FlagUseDualPlaneRGB) && (block_max_var >= DP_BLOCK_VAR_THRESH)) + { + // 0,1 + // 0,2 + // 1,2 + const bool has_r = icov[0] > 16, has_g = icov[3] > 16, has_b = icov[5] > 16; + + const uint32_t total_active_chans = has_r + has_g + has_b; + + if (total_active_chans >= 2) + { + const float r_var = (float)icov[0], g_var = (float)icov[3], b_var = (float)icov[5]; + + const float rg_corr = (has_r && has_g) ? fabs((float)icov[1] / sqrtf(r_var * g_var)) : 1.0f; + const float rb_corr = (has_r && has_b) ? fabs((float)icov[2] / sqrtf(r_var * b_var)) : 1.0f; + const float gb_corr = (has_g && has_b) ? fabs((float)icov[4] / sqrtf(g_var * b_var)) : 1.0f; + + float min_p = basisu::minimum(rg_corr, rb_corr, gb_corr); + if (min_p < STRONG_CORR_THRESH) + { + if (total_active_chans == 2) + { + if (!has_r) + desired_dp_chan = 1; + else if (!has_g) + desired_dp_chan = 0; + else + desired_dp_chan = 0; + } + else + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan = 1; + // assume b is weakest + else + desired_dp_chan = 2; + } +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_rgb++; +#endif + } + } + } + + if ((flags & cPackBC7FlagUseTrivialMode6) && ((desired_dp_chan == -1) && (block_max_var < TRIVIAL_BLOCK_THRESH_RGB))) + { + //pack_mode5_solid(pBlock, color_rgba(0, 255, 0, 255)); + //return; + + int low_c = INT_MAX, high_c = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int y = ((16 * 2) * pPixels[i].r + (16 * 4) * pPixels[i].g + 16 * pPixels[i].b) + i; + low_c = basisu::minimum(low_c, y); + high_c = basisu::maximum(high_c, y); + } + + low_c &= 0xF; + high_c &= 0xF; + + int p0, p1, lr, lg, lb, hr, hg, hb; + + if (flags & cPackBC7FlagPBitOptMode6) + { + // An alternative would be to set A's=1.0 here and bias the p-bit optimizer to lower A RMSE. + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, 0 }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b; + } + else + { + p0 = 1; + p1 = 1; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1); + } + + uint8_t cur_weights[16]; + +#if BASISU_BC7F_USE_SSE41 + eval_weights_mode6_rgb_sse41(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#else + eval_weights_mode6_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#endif + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, 127, p0, + hr, hg, hb, 127, p1, + cur_weights); + +#if BASISU_BC7F_PERF_STATS + g_total_trivial_mode6_blocks++; +#endif + return; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[i]; + + const float sc = block_max_var ? (1.0f / (float)block_max_var) : 0; + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + // quite rough mode 6 SSE estimate (explictly higher bound): if some other mode can't even beat this, don't use it and we fall back to a decently strong mode 6 + const int spans[4] = { max_r - min_r, max_g - min_g, max_b - min_b, 0 }; + + // need_sse_estimates MUST be set correctly or subtle mode selection issues will occur. + const bool need_sse_estimates = ((flags & cPackBC7FlagUse2SubsetsRGB) != 0) || (desired_dp_chan >= 0); + + float mode6_ortho_ratio = 0; + const float mode6_slam_to_line_sse_est = need_sse_estimates ? estimate_slam_to_line_sse_3D(cov, alt_xr, alt_xg, alt_xb, &mode6_ortho_ratio) : 0; + const float mode6_sse_est = need_sse_estimates ? (mode6_slam_to_line_sse_est + analytical_quant_est_sse(128, 16, 3, spans, nullptr, 1.0f, 16)) : 0; + + // Prefer 2/3-subsets over dual plane + // TODO: Use mode 6 sse est? + if ((flags & cPackBC7FlagUse2SubsetsRGB) && (block_max_var >= MIN_BLOCK_MAX_VAR_23SUBSETS) && (mode6_ortho_ratio > ORTHO_RATIO_23SUBSET_RATIO_THRESH)) + { + assert(need_sse_estimates); + + const bool high_ortho_energy_flag = (mode6_slam_to_line_sse_est >= HIGH_ORTHO_ENERGY_THRESH); + + if (high_ortho_energy_flag) + { +#if BASISU_BC7F_PERF_STATS + g_total_high_ortho_energy++; +#endif + //pack_mode5_solid(pBlock, color_rgba(255, 255, 0, 255)); + //return; + + if ((flags & cPackBC7FlagUse3SubsetsRGB) && (block_max_var >= MIN_BLOCK_MAX_VAR_3SUBSETS)) + { + //pack_mode5_solid(pBlock, color_rgba(255, 0, 255, 255)); + //return; + +#if 0 + if (pack_mode0_or_2_rgb(pBlock, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, mode6_sse_est, flags)) + { + return; + } +#else + float mode0_or_2_sse_est = 1e+9f; + if (pack_mode0_or_2_rgb(pBlock, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, mode6_sse_est, flags, &mode0_or_2_sse_est)) + { + float mode1_or_3_sse_est = 1e+9f; + + uint8_t temp_2subset_block[sizeof(basist::bc7_block)]; + if (pack_mode1_or_3_rgb(temp_2subset_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, mode0_or_2_sse_est, flags, &mode1_or_3_sse_est)) + { + assert(mode1_or_3_sse_est < mode0_or_2_sse_est); + memcpy(pBlock, temp_2subset_block, sizeof(basist::bc7_block)); + } + + return; + } +#endif + } + + if (pack_mode1_or_3_rgb(pBlock, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, mode6_sse_est, flags)) + return; + } + } + + // Use dual plane over mode 6 + if (desired_dp_chan >= 0) + { + assert(need_sse_estimates); + + if (pack_mode4_or_5(pBlock, pPixels, desired_dp_chan, mode6_sse_est, flags)) + return; + + } // if (desired_dp_chan >= 0) + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + +#if BASISU_BC7F_USE_SSE41 + int low_c, high_c; + bc7_proj_minmax_indices_sse41(pPixels, saxis_r, saxis_g, saxis_b, &low_c, &high_c); +#else + for (uint32_t i = 0; i < 16; i += 4) + { + assert(((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) & 0xF) == 0); // sanity + assert(((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) & 0xF) == 0); + assert(((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) & 0xF) == 0); + assert(((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) & 0xF) == 0); + + const int dot0 = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) + i; + const int dot1 = (pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) + i + 1; + const int dot2 = (pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) + i + 2; + const int dot3 = (pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + + int low_c = low_dot & 15; + int high_c = high_dot & 15; +#endif + + int p0, p1, lr, lg, lb, hr, hg, hb; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, 0 }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b; + } + else + { + // explictly force pbits to 1, that way alpha is always 255 and we don't slow down the entire encoder by 4-8% for a tiny ~.1 dB PSNR gain (not worth it) + p0 = 1, p1 = 1; + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1); + } + + uint8_t cur_weights[16]; + +#if BASISU_BC7F_USE_SSE41 + eval_weights_mode6_rgb_sse41(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#else + eval_weights_mode6_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#endif + + vec4F xl, xh; + bool res = compute_least_squares_endpoints_3D( + 16, cur_weights, 16, + g_bc7_4bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b); + + if (res) + { + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { xl[0] * q, xl[1] * q, xl[2] * q, 0.0f }; + float sxh[4] = { xh[0] * q, xh[1] * q, xh[2] * q, 0.0f }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits( + 3, 7, sxl, sxh, + bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b; + } + else + { + p0 = 1; p1 = 1; + lr = to_7(xl[0], p0); + lg = to_7(xl[1], p0); + lb = to_7(xl[2], p0); + + hr = to_7(xh[0], p1); + hg = to_7(xh[1], p1); + hb = to_7(xh[2], p1); + } + +#if BASISU_BC7F_USE_SSE41 + eval_weights_mode6_rgb_sse41(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#else + eval_weights_mode6_rgb(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); +#endif + } + + //pack_mode5_solid(pBlock, color_rgba(0, 0, 255, 255)); + //return; + + // pbits set to 1 to ensure alpha is always decoded to fully opaque (255) + encode_mode6_rgba_block(pBlock, + lr, lg, lb, 127, p0, + hr, hg, hb, 127, p1, + cur_weights); + } + + //------------------------------------------------------------------------------------------------------- + const int MIN_BLOCK_MAX_VAR_23SUBSETS_RGBA = 100 * 16; + const float HIGH_ORTHO_ENERGY_THRESH_RGBA = 1.0f * 16.0f; // use 2+ subsets threshold + const float ORTHO_RATIO_23SUBSET_RATIO_THRESH_RGBA = .004f; + + uint32_t fast_pack_bc7_rgb_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { + assert(g_bc7_4bit_ls_tab[1][0]); + +#if BASISU_BC7F_PERF_STATS + g_total_rgb_calls++; +#endif + + const uint32_t fc = *(const uint32_t*)&pPixels[0]; + if (fc == *(const uint32_t*)&pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (*(const uint32_t*)&pPixels[k] != fc) + break; + + if (k == 15) + { +#if BASISU_BC7F_PERF_STATS + g_total_solid_blocks++; +#endif + + pack_mode5_solid(pBlock, pPixels[0]); + return 0; + } + } + + int total_r = 0, total_g = 0, total_b = 0; + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + + total_r += r; total_g += g; total_b += b; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + } + + int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4; + + // covar rows are: + // 0, 1, 2 + // 1, 3, 4 + // 2, 4, 5 + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pPixels[i].r - mean_r; + int g = (int)pPixels[i].g - mean_g; + int b = (int)pPixels[i].b - mean_b; + icov[0] += r * r; icov[1] += r * g; icov[2] += r * b; + icov[3] += g * g; icov[4] += g * b; + icov[5] += b * b; + } + + int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16 + + // not redundant due to uint32_t test above, which could be fooled by alpha accidentally passed in + if (!block_max_var) + { +#if BASISU_BC7F_PERF_STATS + g_total_solid_blocks++; +#endif + pack_mode5_solid(pBlock, pPixels[0]); + return 0; + } + + // check for dual plane, if a single component is very strongly decorrelated then switch to modes 4/5 + int desired_dp_chan = -1; + + const bool non_analytical_flag = (flags & cPackBC7FlagNonAnalyticalRGB) != 0; + + if ((flags & cPackBC7FlagUseDualPlaneRGB) && + ((!non_analytical_flag && (block_max_var >= DP_BLOCK_VAR_THRESH)) || (non_analytical_flag && (block_max_var >= 16)))) + { + // 0,1 + // 0,2 + // 1,2 + const bool has_r = icov[0] > 16, has_g = icov[3] > 16, has_b = icov[5] > 16; + + const uint32_t total_active_chans = has_r + has_g + has_b; + + if (total_active_chans >= 2) + { + const float r_var = (float)icov[0], g_var = (float)icov[3], b_var = (float)icov[5]; + + const float rg_corr = (has_r && has_g) ? fabs((float)icov[1] / sqrtf(r_var * g_var)) : 1.0f; + const float rb_corr = (has_r && has_b) ? fabs((float)icov[2] / sqrtf(r_var * b_var)) : 1.0f; + const float gb_corr = (has_g && has_b) ? fabs((float)icov[4] / sqrtf(g_var * b_var)) : 1.0f; + + float min_p = basisu::minimum(rg_corr, rb_corr, gb_corr); + + const float corr_thresh = non_analytical_flag ? .999f : STRONG_CORR_THRESH; + + if (min_p < corr_thresh) + { + if (total_active_chans == 2) + { + if (!has_r) + desired_dp_chan = 1; + else if (!has_g) + desired_dp_chan = 0; + else + desired_dp_chan = 0; + } + else + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan = 1; + // assume b is weakest + else + desired_dp_chan = 2; + } +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_rgb++; +#endif + } + } + } + + if ((flags & cPackBC7FlagUseTrivialMode6) && ((desired_dp_chan == -1) && (block_max_var < TRIVIAL_BLOCK_THRESH_RGB))) + { + int low_c = INT_MAX, high_c = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int y = ((16 * 2) * pPixels[i].r + (16 * 4) * pPixels[i].g + 16 * pPixels[i].b) + i; + low_c = basisu::minimum(low_c, y); + high_c = basisu::maximum(high_c, y); + } + + low_c &= 0xF; + high_c &= 0xF; + + int p0, p1, lr, lg, lb, hr, hg, hb; + + if (flags & cPackBC7FlagPBitOptMode6) + { + // An alternative would be to set A's=1.0 here and bias the p-bit optimizer to lower A RMSE. + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, 0 }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b; + } + else + { + p0 = 1; + p1 = 1; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1); + } + + uint8_t cur_weights[16]; + uint32_t mode6_actual_sse = eval_weights_mode6_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, 127, p0, + hr, hg, hb, 127, p1, + cur_weights); + +#if BASISU_BC7F_PERF_STATS + g_total_trivial_mode6_blocks++; +#endif + +#ifdef _DEBUG + { + // Final sanity checking. + uint32_t expected_actual_sse = calc_sse(pBlock, pPixels); + assert(expected_actual_sse == mode6_actual_sse); + } +#endif + + return mode6_actual_sse; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)icov[i]; + + const float sc = block_max_var ? (1.0f / (float)block_max_var) : 0; + const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5]; + + const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz; + const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz; + const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz; + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(alt_xr * m); + saxis_g = (int)(alt_xg * m); + saxis_b = (int)(alt_xb * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + +#if BASISU_BC7F_USE_SSE41 + int low_c, high_c; + bc7_proj_minmax_indices_sse41(pPixels, saxis_r, saxis_g, saxis_b, &low_c, &high_c); +#else + for (uint32_t i = 0; i < 16; i += 4) + { + assert(((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) & 0xF) == 0); // sanity + assert(((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) & 0xF) == 0); + assert(((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) & 0xF) == 0); + assert(((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) & 0xF) == 0); + + const int dot0 = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) + i; + const int dot1 = (pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) + i + 1; + const int dot2 = (pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) + i + 2; + const int dot3 = (pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + + int low_c = low_dot & 15; + int high_c = high_dot & 15; +#endif + + int p0, p1, lr, lg, lb, hr, hg, hb; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, 0 }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, 0 }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(3, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b; + } + else + { + // explictly force pbits to 1, that way alpha is always 255 and we don't slow down the entire encoder by 4-8% for a tiny ~.1 dB PSNR gain (not worth it) + p0 = 1, p1 = 1; + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1); + } + + uint8_t cur_weights[16]; + + uint32_t mode6_actual_sse = eval_weights_mode6_rgb_sse(pPixels, cur_weights, lr, lg, lb, hr, hg, hb, p0, p1); + + if (mode6_actual_sse) + { + vec4F xl, xh; + bool res = compute_least_squares_endpoints_3D( + 16, cur_weights, 16, + g_bc7_4bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b); + + if (res) + { + int trial_p0, trial_p1, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { xl[0] * q, xl[1] * q, xl[2] * q, 0.0f }; + float sxh[4] = { xh[0] * q, xh[1] * q, xh[2] * q, 0.0f }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits( + 3, 7, sxl, sxh, + bestMinColor, bestMaxColor, best_pbits); + + trial_p0 = best_pbits[0], trial_p1 = best_pbits[1]; + trial_lr = bestMinColor.r, trial_lg = bestMinColor.g, trial_lb = bestMinColor.b; + trial_hr = bestMaxColor.r, trial_hg = bestMaxColor.g, trial_hb = bestMaxColor.b; + } + else + { + trial_p0 = 1; trial_p1 = 1; + trial_lr = to_7(xl[0], trial_p0); + trial_lg = to_7(xl[1], trial_p0); + trial_lb = to_7(xl[2], trial_p0); + + trial_hr = to_7(xh[0], trial_p1); + trial_hg = to_7(xh[1], trial_p1); + trial_hb = to_7(xh[2], trial_p1); + } + + uint8_t trial_weights[16]; + uint32_t mode6_ls_actual_sse = eval_weights_mode6_rgb_sse(pPixels, trial_weights, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_p0, trial_p1); + if (mode6_ls_actual_sse < mode6_actual_sse) + { + mode6_actual_sse = mode6_ls_actual_sse; + memcpy(cur_weights, trial_weights, 16); + p0 = trial_p0; p1 = trial_p1; + lr = trial_lr; lg = trial_lg; lb = trial_lb; + hr = trial_hr; hg = trial_hg; hb = trial_hb; + } + } + } + + uint32_t mode02_actual_sse = UINT32_MAX; + uint8_t mode02_candidate_block[sizeof(basist::bc7_block)]; + + uint32_t mode13_actual_sse = UINT32_MAX; + uint8_t mode13_candidate_block[sizeof(basist::bc7_block)]; + + uint32_t mode45_actual_sse = UINT32_MAX; + uint8_t mode45_candidate_block[sizeof(basist::bc7_block)]; + + if (mode6_actual_sse) + { + if (non_analytical_flag) + { + // No gates: very expensive. + if (flags & cPackBC7FlagUse2SubsetsRGB) + { + if (flags & cPackBC7FlagUse3SubsetsRGB) + { + pack_mode0_or_2_rgb(mode02_candidate_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, 1e+9f, flags, nullptr, &mode02_actual_sse); + } + + pack_mode1_or_3_rgb(mode13_candidate_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, 1e+9f, flags, nullptr, &mode13_actual_sse); + } + + if (flags & cPackBC7FlagUseDualPlaneRGB) + pack_mode4_or_5(mode45_candidate_block, pPixels, (desired_dp_chan >= 0) ? desired_dp_chan : 1, 1e+9f, flags, nullptr, &mode45_actual_sse); // todo: determine best def channel here + } + else + { + float mode6_ortho_ratio; + const float mode6_slam_to_line_sse_est = estimate_slam_to_line_sse_3D(cov, alt_xr, alt_xg, alt_xb, &mode6_ortho_ratio); + + if ((flags & cPackBC7FlagUse2SubsetsRGB) && (block_max_var >= MIN_BLOCK_MAX_VAR_23SUBSETS) && (mode6_ortho_ratio > ORTHO_RATIO_23SUBSET_RATIO_THRESH)) + { + const bool high_ortho_energy_flag = (mode6_slam_to_line_sse_est >= HIGH_ORTHO_ENERGY_THRESH); + + if (high_ortho_energy_flag) + { +#if BASISU_BC7F_PERF_STATS + g_total_high_ortho_energy++; +#endif + + if ((flags & cPackBC7FlagUse3SubsetsRGB) && (block_max_var >= MIN_BLOCK_MAX_VAR_3SUBSETS)) + { + pack_mode0_or_2_rgb(mode02_candidate_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, 1e+9f, flags, nullptr, &mode02_actual_sse); + pack_mode1_or_3_rgb(mode13_candidate_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, 1e+9f, flags, nullptr, &mode13_actual_sse); + } + else + { + pack_mode1_or_3_rgb(mode13_candidate_block, pPixels, alt_xr, alt_xg, alt_xb, mean_r, mean_g, mean_b, 1e+9f, flags, nullptr, &mode13_actual_sse); + } + } + } + + if (desired_dp_chan >= 0) + { + assert(flags & cPackBC7FlagUseDualPlaneRGB); + + pack_mode4_or_5(mode45_candidate_block, pPixels, desired_dp_chan, 1e+9f, flags, nullptr, &mode45_actual_sse); + + } // if (desired_dp_chan >= 0) + } + } + + const uint32_t best_actual_sse = basisu::minimum(mode6_actual_sse, mode02_actual_sse, mode13_actual_sse, mode45_actual_sse); + + if ((mode45_actual_sse != UINT32_MAX) && (best_actual_sse == mode45_actual_sse)) + { + memcpy(pBlock, mode45_candidate_block, sizeof(basist::bc7_block)); + } + else if ((mode02_actual_sse != UINT32_MAX) && (best_actual_sse == mode02_actual_sse)) + { + memcpy(pBlock, mode02_candidate_block, sizeof(basist::bc7_block)); + } + else if ((mode13_actual_sse != UINT32_MAX) && (best_actual_sse == mode13_actual_sse)) + { + memcpy(pBlock, mode13_candidate_block, sizeof(basist::bc7_block)); + } + else + { + assert(mode6_actual_sse == best_actual_sse); + + // pbits set to 1 to ensure alpha is always decoded to fully opaque (255) + encode_mode6_rgba_block(pBlock, + lr, lg, lb, 127, p0, + hr, hg, hb, 127, p1, + cur_weights); + } + +#ifdef _DEBUG + { + // Final sanity checking. + uint32_t expected_actual_sse = calc_sse(pBlock, pPixels); + assert(expected_actual_sse == best_actual_sse); + } +#endif + + return best_actual_sse; + } + + //------------------------------------------------------------------------------------------------------- + + void fast_pack_bc7_rgba_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { + assert(g_bc7_4bit_ls_tab[1][0]); + +#if BASISU_BC7F_PERF_STATS + g_total_rgba_calls++; +#endif + + const uint32_t fc = *(const uint32_t*)&pPixels[0]; + if (fc == *(const uint32_t*)&pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (*(const uint32_t*)&pPixels[k] != fc) + break; + + if (k == 15) + { + pack_mode5_solid(pBlock, pPixels[0]); + return; + } + } + + int total_r = 0, total_g = 0, total_b = 0, total_a = 0; + int min_r = 255, min_g = 255, min_b = 255, min_a = 255; + int max_r = 0, max_g = 0, max_b = 0, max_a = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b, a = pPixels[i].a; + + total_r += r; total_g += g; total_b += b; total_a += a; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); min_a = basisu::minimum(min_a, a); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); max_a = basisu::maximum(max_a, a); + } + + assert((min_r != max_r) || (min_g != max_g) || (min_b != max_b) || (min_a != max_a)); + + const int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4, mean_a = (total_a + 8) >> 4; + + // covar rows are: + int icov4[10] = { }; + + // 0=rr + // 1=rg + // 2=rb + // 3=ra + // + // 4=gg + // 5=gb + // 6=ga + // + // 7=bb + // 8=ba + // + // 9=aa + + // 0 1 2 3 + // 4 5 6 + // 7 8 + // 9 + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + // trace at 0,4,7,9 + + for (uint32_t i = 0; i < 16; i++) + { + const int r = (int)pPixels[i].r - mean_r, g = (int)pPixels[i].g - mean_g, b = (int)pPixels[i].b - mean_b, a = (int)pPixels[i].a - mean_a; + + icov4[0] += r * r; icov4[1] += r * g; icov4[2] += r * b; icov4[3] += r * a; + icov4[4] += g * g; icov4[5] += g * b; icov4[6] += g * a; + icov4[7] += b * b; icov4[8] += b * a; + icov4[9] += a * a; + } + + const int block_max_var4 = basisu::maximum(icov4[0], icov4[4], icov4[7], icov4[9]); // not divided by 16, i.e. scaled by 16 + assert(block_max_var4); // solid blocks already filtered out + + // check for dual plane, if a single component is very strongly decorrelated then switch to modes 4/5 + int desired_dp_chan = -1; + + if ((flags & cPackBC7FlagUseDualPlaneRGBA) && (block_max_var4 >= DP_BLOCK_VAR_THRESH_RGBA)) + { + // Prefer A, if not strongly decorrelated then check RGB. + const float r_var = (float)icov4[0], g_var = (float)icov4[4], b_var = (float)icov4[7], a_var = (float)icov4[9]; + + const bool has_a = icov4[9] > 0; + + if (has_a) + { + const float p_03 = icov4[0] ? fabs((float)icov4[3] / sqrtf(r_var * a_var)) : 1.0f; + const float p_13 = icov4[4] ? fabs((float)icov4[6] / sqrtf(g_var * a_var)) : 1.0f; + const float p_23 = icov4[7] ? fabs((float)icov4[8] / sqrtf(b_var * a_var)) : 1.0f; + + const float min_p = basisu::minimum(p_03, p_13, p_23); + if (min_p < ALPHA_DECORR_THRESHOLD) + { + desired_dp_chan = 3; +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_a++; +#endif + } + } + + if (desired_dp_chan < 0) + { + const bool has_r = icov4[0] > 16, has_g = icov4[4] > 16, has_b = icov4[7] > 16; + const uint32_t total_active_chans_rgb = has_r + has_g + has_b; + + if (total_active_chans_rgb >= 2) + { + const float rg_corr = (has_r && has_g) ? fabs((float)icov4[1] / sqrtf(r_var * g_var)) : 1.0f; + const float rb_corr = (has_r && has_b) ? fabs((float)icov4[2] / sqrtf(r_var * b_var)) : 1.0f; + const float gb_corr = (has_g && has_b) ? fabs((float)icov4[5] / sqrtf(g_var * b_var)) : 1.0f; + + float min_p = basisu::minimum(rg_corr, rb_corr, gb_corr); + if (min_p < STRONG_DECORR_THRESH_RGBA) + { + if (total_active_chans_rgb == 2) + { + if (!has_r) + desired_dp_chan = 1; + else if (!has_g) + desired_dp_chan = 0; + else + desired_dp_chan = 0; + } + else + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan = 1; + // assume b is weakest + else + desired_dp_chan = 2; + } +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_rgb++; +#endif + } + } + } + } + + if ((flags & cPackBC7FlagUseTrivialMode6) && ((desired_dp_chan == -1) && (block_max_var4 < TRIVIAL_BLOCK_THRESH_RGBA))) + { + //pack_mode5_solid(pBlock, color_rgba(0, 255, 0, 255)); + //return; + + int low_c = INT_MAX, high_c = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int y = ((16 * 2) * pPixels[i].r + (16 * 4) * pPixels[i].g + 16 * pPixels[i].b + (16 * 4) * pPixels[i].a); + assert((y & 0xF) == 0); + y += i; + low_c = basisu::minimum(low_c, y); + high_c = basisu::maximum(high_c, y); + } + + low_c &= 0xF; + high_c &= 0xF; + + int p0, p1, lr, lg, lb, la, hr, hg, hb, ha; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, (float)pPixels[low_c].a * q }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, (float)pPixels[high_c].a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b, la = bestMinColor.a; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b, ha = bestMaxColor.a; + } + else + { + p0 = pPixels[low_c].a > 128; + p1 = pPixels[high_c].a > 128; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0), la = to_7(pPixels[low_c].a, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1), ha = to_7(pPixels[high_c].a, p1); + } + + uint8_t cur_weights[16]; + eval_weights_mode6_rgba(pPixels, cur_weights, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1); + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1, + cur_weights); + +#if BASISU_BC7F_PERF_STATS + g_total_trivial_mode6_blocks++; +#endif + return; + } + + float cov4[10]; + for (uint32_t i = 0; i < 10; i++) + cov4[i] = (float)icov4[i]; + + // all channel pairs: + // 0,1=1 + // 0,2=2 + // 0,3=3 + // 1,2=5 + // 1,3=6 + // 2,3=8 + //const float r_var = cov4[0], g_var = cov4[4], b_var = cov4[7], a_var = cov4[9]; + + const float sc4 = block_max_var4 ? (1.0f / (float)block_max_var4) : 0; + float wx = sc4 * cov4[0], wy = sc4 * cov4[4], wz = sc4 * cov4[7], wa = sc4 * cov4[9]; + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + // TODO + float x1, y1, z1, w1; + for (uint32_t i = 0; i < 4; i++) + { + x1 = cov4[0] * wx + cov4[1] * wy + cov4[2] * wz + cov4[3] * wa; + y1 = cov4[1] * wx + cov4[4] * wy + cov4[5] * wz + cov4[6] * wa; + z1 = cov4[2] * wx + cov4[5] * wy + cov4[7] * wz + cov4[8] * wa; + w1 = cov4[3] * wx + cov4[6] * wy + cov4[8] * wz + cov4[9] * wa; + + float t = sqrtf(x1 * x1 + y1 * y1 + z1 * z1 + w1 * w1); + if (t > basisu::SMALL_FLOAT_VAL) + { + t = 1.0f / t; + x1 *= t; y1 *= t; z1 *= t; w1 *= t; + } + else + { + x1 = y1 = z1 = w1 = .25f; + } + + wx = x1; wy = y1; wz = z1; wa = w1; + } + + const int spans[4] = { max_r - min_r, max_g - min_g, max_b - min_b, max_a - min_a }; + + float mode6_ortho_ratio; + const float mode6_slam_to_line_sse_est = estimate_slam_to_line_sse_4D(cov4, x1, y1, z1, w1, &mode6_ortho_ratio); + const float mode6_sse_est = (mode6_slam_to_line_sse_est + analytical_quant_est_sse(128, 16, 4, spans, nullptr, 1.0f, 16)); + + float mode45_sse_est = 1e+9f, mode7_sse_est = 1e+9f; + + uint8_t mode45_block[sizeof(basist::bc7_block)]; + const bool mode45_valid_flag = (desired_dp_chan >= 0) ? pack_mode4_or_5(mode45_block, pPixels, desired_dp_chan, mode6_sse_est, flags, &mode45_sse_est) : false; + BASISU_NOTE_UNUSED(mode45_valid_flag); + + uint8_t mode7_block[sizeof(basist::bc7_block)]; + bool mode7_valid_flag = false; + BASISU_NOTE_UNUSED(mode7_valid_flag); + + if ((flags & cPackBC7FlagUse2SubsetsRGBA) && (block_max_var4 >= MIN_BLOCK_MAX_VAR_23SUBSETS_RGBA) && (mode6_ortho_ratio > ORTHO_RATIO_23SUBSET_RATIO_THRESH_RGBA)) + { + const bool high_ortho_energy_flag = (mode6_slam_to_line_sse_est >= HIGH_ORTHO_ENERGY_THRESH_RGBA); + + if (high_ortho_energy_flag) + { +#if BASISU_BC7F_PERF_STATS + g_total_high_ortho_energy++; +#endif + mode7_valid_flag = pack_mode7_rgba(mode7_block, pPixels, x1, y1, z1, w1, mean_r, mean_g, mean_b, mean_a, mode6_sse_est, flags, &mode7_sse_est); + } + } + + if ((mode45_sse_est < mode7_sse_est) && (mode45_sse_est < mode6_sse_est)) + { + assert(mode45_valid_flag); + memcpy(pBlock, mode45_block, sizeof(basist::bc7_block)); + return; + } + else if ((mode7_sse_est < mode45_sse_est) && (mode7_sse_est < mode6_sse_est)) + { + assert(mode7_valid_flag); + memcpy(pBlock, mode7_block, sizeof(basist::bc7_block)); + return; + } + + // Fall back to mode 6 + int saxis_r = 256, saxis_g = 256, saxis_b = 256, saxis_a = 256; + + float k = basisu::maximum(fabsf(x1), fabsf(y1), fabsf(z1), fabs(w1)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(x1 * m); + saxis_g = (int)(y1 * m); + saxis_b = (int)(z1 * m); + saxis_a = (int)(w1 * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + saxis_a = (int)((uint32_t)saxis_a << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + + for (uint32_t i = 0; i < 16; i += 4) + { + assert(((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) & 0xF) == 0); // sanity + assert(((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b + pPixels[i + 1].a * saxis_a) & 0xF) == 0); + assert(((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b + pPixels[i + 2].a * saxis_a) & 0xF) == 0); + assert(((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b + pPixels[i + 3].a * saxis_a) & 0xF) == 0); + + const int dot0 = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) + i; + const int dot1 = (pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b + pPixels[i + 1].a * saxis_a) + i + 1; + const int dot2 = (pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b + pPixels[i + 2].a * saxis_a) + i + 2; + const int dot3 = (pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b + pPixels[i + 3].a * saxis_a) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + + const int low_c = low_dot & 15; + const int high_c = high_dot & 15; + + int p0, p1, lr, lg, lb, la, hr, hg, hb, ha; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, (float)pPixels[low_c].a * q }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, (float)pPixels[high_c].a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b, la = bestMinColor.a; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b, ha = bestMaxColor.a; + } + else + { + p0 = pPixels[low_c].a > 128; + p1 = pPixels[high_c].a > 128; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0), la = to_7(pPixels[low_c].a, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1), ha = to_7(pPixels[high_c].a, p1); + } + + uint8_t cur_weights[16]; + eval_weights_mode6_rgba(pPixels, cur_weights, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1); + + vec4F xl, xh; + bool res = compute_least_squares_endpoints_4D( + 16, cur_weights, 16, + g_bc7_4bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b, (float)total_a); + + if (res) + { + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { xl[0] * q, xl[1] * q, xl[2] * q, xl[3] * q }; + float sxh[4] = { xh[0] * q, xh[1] * q, xh[2] * q, xh[3] * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b, la = bestMinColor.a; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b, ha = bestMaxColor.a; + } + else + { + p0 = (xl[3] >= 129.0f); + lr = to_7(xl[0], p0); + lg = to_7(xl[1], p0); + lb = to_7(xl[2], p0); + la = to_7(xl[3], p0); + + p1 = (xh[3] >= 129.0f); + hr = to_7(xh[0], p1); + hg = to_7(xh[1], p1); + hb = to_7(xh[2], p1); + ha = to_7(xh[3], p1); + } + + eval_weights_mode6_rgba(pPixels, cur_weights, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1); + } + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1, + cur_weights); + } + + uint32_t fast_pack_bc7_rgba_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { + assert(g_bc7_4bit_ls_tab[1][0]); + +#if BASISU_BC7F_PERF_STATS + g_total_rgba_calls++; +#endif + + const uint32_t fc = *(const uint32_t*)&pPixels[0]; + if (fc == *(const uint32_t*)&pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (*(const uint32_t*)&pPixels[k] != fc) + break; + + if (k == 15) + { + pack_mode5_solid(pBlock, pPixels[0]); + return 0; + } + } + + int total_r = 0, total_g = 0, total_b = 0, total_a = 0; + int min_r = 255, min_g = 255, min_b = 255, min_a = 255; + int max_r = 0, max_g = 0, max_b = 0, max_a = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b, a = pPixels[i].a; + + total_r += r; total_g += g; total_b += b; total_a += a; + + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); min_a = basisu::minimum(min_a, a); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); max_a = basisu::maximum(max_a, a); + } + + assert((min_r != max_r) || (min_g != max_g) || (min_b != max_b) || (min_a != max_a)); + + const int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4, mean_a = (total_a + 8) >> 4; + + // covar rows are: + int icov4[10] = { }; + + // 0=rr + // 1=rg + // 2=rb + // 3=ra + // + // 4=gg + // 5=gb + // 6=ga + // + // 7=bb + // 8=ba + // + // 9=aa + + // 0 1 2 3 + // 4 5 6 + // 7 8 + // 9 + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + // trace at 0,4,7,9 + + for (uint32_t i = 0; i < 16; i++) + { + const int r = (int)pPixels[i].r - mean_r, g = (int)pPixels[i].g - mean_g, b = (int)pPixels[i].b - mean_b, a = (int)pPixels[i].a - mean_a; + + icov4[0] += r * r; icov4[1] += r * g; icov4[2] += r * b; icov4[3] += r * a; + icov4[4] += g * g; icov4[5] += g * b; icov4[6] += g * a; + icov4[7] += b * b; icov4[8] += b * a; + icov4[9] += a * a; + } + + const int block_max_var4 = basisu::maximum(icov4[0], icov4[4], icov4[7], icov4[9]); // not divided by 16, i.e. scaled by 16 + assert(block_max_var4); // solid blocks already filtered out + + // check for dual plane, if a single component is very strongly decorrelated then switch to modes 4/5 + int desired_dp_chan = -1; + + const bool non_analytical_flag = (flags & cPackBC7FlagNonAnalyticalRGBA) != 0; + + if ((flags & cPackBC7FlagUseDualPlaneRGBA) && + ((!non_analytical_flag && (block_max_var4 >= DP_BLOCK_VAR_THRESH_RGBA)) || (non_analytical_flag && (block_max_var4 > 16))) + ) + { + // Prefer A, if not strongly decorrelated then check RGB. + const float r_var = (float)icov4[0], g_var = (float)icov4[4], b_var = (float)icov4[7], a_var = (float)icov4[9]; + + const bool has_a = icov4[9] > 0; + + if (has_a) + { + const float p_03 = icov4[0] ? fabs((float)icov4[3] / sqrtf(r_var * a_var)) : 1.0f; + const float p_13 = icov4[4] ? fabs((float)icov4[6] / sqrtf(g_var * a_var)) : 1.0f; + const float p_23 = icov4[7] ? fabs((float)icov4[8] / sqrtf(b_var * a_var)) : 1.0f; + + const float min_p = basisu::minimum(p_03, p_13, p_23); + if (min_p < ALPHA_DECORR_THRESHOLD) + { + desired_dp_chan = 3; +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_a++; +#endif + } + } + + if (desired_dp_chan < 0) + { + const bool has_r = icov4[0] > 16, has_g = icov4[4] > 16, has_b = icov4[7] > 16; + const uint32_t total_active_chans_rgb = has_r + has_g + has_b; + + if (total_active_chans_rgb >= 2) + { + const float rg_corr = (has_r && has_g) ? fabs((float)icov4[1] / sqrtf(r_var * g_var)) : 1.0f; + const float rb_corr = (has_r && has_b) ? fabs((float)icov4[2] / sqrtf(r_var * b_var)) : 1.0f; + const float gb_corr = (has_g && has_b) ? fabs((float)icov4[5] / sqrtf(g_var * b_var)) : 1.0f; + + float min_p = basisu::minimum(rg_corr, rb_corr, gb_corr); + + const float decorr_thresh = non_analytical_flag ? .999f : STRONG_DECORR_THRESH_RGBA; + if (min_p < decorr_thresh) + { + if (total_active_chans_rgb == 2) + { + if (!has_r) + desired_dp_chan = 1; + else if (!has_g) + desired_dp_chan = 0; + else + desired_dp_chan = 0; + } + else + { + // see if rg/rb is weakly correlated vs. gb + if ((rg_corr < gb_corr) && (rb_corr < gb_corr)) + desired_dp_chan = 0; + // see if gr/gb is weakly correlated vs. rb + else if ((rg_corr < rb_corr) && (gb_corr < rb_corr)) + desired_dp_chan = 1; + // assume b is weakest + else + desired_dp_chan = 2; + } +#if BASISU_BC7F_PERF_STATS + g_total_dp_valid_chans_rgb++; +#endif + } + } + } + } + + if ((flags & cPackBC7FlagUseTrivialMode6) && ((desired_dp_chan == -1) && (block_max_var4 < TRIVIAL_BLOCK_THRESH_RGBA))) + { + int low_c = INT_MAX, high_c = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int y = ((16 * 2) * pPixels[i].r + (16 * 4) * pPixels[i].g + 16 * pPixels[i].b + (16 * 4) * pPixels[i].a); + assert((y & 0xF) == 0); + y += i; + low_c = basisu::minimum(low_c, y); + high_c = basisu::maximum(high_c, y); + } + + low_c &= 0xF; + high_c &= 0xF; + + int p0, p1, lr, lg, lb, la, hr, hg, hb, ha; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, (float)pPixels[low_c].a * q }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, (float)pPixels[high_c].a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b, la = bestMinColor.a; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b, ha = bestMaxColor.a; + } + else + { + p0 = pPixels[low_c].a > 128; + p1 = pPixels[high_c].a > 128; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0), la = to_7(pPixels[low_c].a, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1), ha = to_7(pPixels[high_c].a, p1); + } + + uint8_t cur_weights[16]; + uint32_t mode6_actual_sse = eval_weights_mode6_rgba_sse(pPixels, cur_weights, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1); + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1, + cur_weights); + +#if BASISU_BC7F_PERF_STATS + g_total_trivial_mode6_blocks++; +#endif + +#ifdef _DEBUG + { + // Final sanity checking. + uint32_t expected_actual_sse = calc_sse(pBlock, pPixels); + assert(expected_actual_sse == mode6_actual_sse); + } +#endif + + return mode6_actual_sse; + } + + float cov4[10]; + for (uint32_t i = 0; i < 10; i++) + cov4[i] = (float)icov4[i]; + + // all channel pairs: + // 0,1=1 + // 0,2=2 + // 0,3=3 + // 1,2=5 + // 1,3=6 + // 2,3=8 + //const float r_var = cov4[0], g_var = cov4[4], b_var = cov4[7], a_var = cov4[9]; + + const float sc4 = block_max_var4 ? (1.0f / (float)block_max_var4) : 0; + float wx = sc4 * cov4[0], wy = sc4 * cov4[4], wz = sc4 * cov4[7], wa = sc4 * cov4[9]; + + // 0 1 2 3 + // 1 4 5 6 + // 2 5 7 8 + // 3 6 8 9 + + // TODO + float x1, y1, z1, w1; + for (uint32_t i = 0; i < 4; i++) + { + x1 = cov4[0] * wx + cov4[1] * wy + cov4[2] * wz + cov4[3] * wa; + y1 = cov4[1] * wx + cov4[4] * wy + cov4[5] * wz + cov4[6] * wa; + z1 = cov4[2] * wx + cov4[5] * wy + cov4[7] * wz + cov4[8] * wa; + w1 = cov4[3] * wx + cov4[6] * wy + cov4[8] * wz + cov4[9] * wa; + + float t = sqrtf(x1 * x1 + y1 * y1 + z1 * z1 + w1 * w1); + if (t > basisu::SMALL_FLOAT_VAL) + { + t = 1.0f / t; + x1 *= t; y1 *= t; z1 *= t; w1 *= t; + } + else + { + x1 = y1 = z1 = w1 = .25f; + } + + wx = x1; wy = y1; wz = z1; wa = w1; + } + + // Fall back to mode 6 + int saxis_r = 256, saxis_g = 256, saxis_b = 256, saxis_a = 256; + + float k = basisu::maximum(fabsf(x1), fabsf(y1), fabsf(z1), fabs(w1)); + if (fabs(k) >= basisu::SMALL_FLOAT_VAL) + { + float m = 2048.0f / k; + saxis_r = (int)(x1 * m); + saxis_g = (int)(y1 * m); + saxis_b = (int)(z1 * m); + saxis_a = (int)(w1 * m); + } + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + saxis_a = (int)((uint32_t)saxis_a << 4U); + + int low_dot = INT_MAX, high_dot = INT_MIN; + + for (uint32_t i = 0; i < 16; i += 4) + { + assert(((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) & 0xF) == 0); // sanity + assert(((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b + pPixels[i + 1].a * saxis_a) & 0xF) == 0); + assert(((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b + pPixels[i + 2].a * saxis_a) & 0xF) == 0); + assert(((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b + pPixels[i + 3].a * saxis_a) & 0xF) == 0); + + const int dot0 = (pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b + pPixels[i].a * saxis_a) + i; + const int dot1 = (pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b + pPixels[i + 1].a * saxis_a) + i + 1; + const int dot2 = (pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b + pPixels[i + 2].a * saxis_a) + i + 2; + const int dot3 = (pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b + pPixels[i + 3].a * saxis_a) + i + 3; + + int min_d01 = basisu::minimum(dot0, dot1); + int max_d01 = basisu::maximum(dot0, dot1); + + int min_d23 = basisu::minimum(dot2, dot3); + int max_d23 = basisu::maximum(dot2, dot3); + + int min_d = basisu::minimum(min_d01, min_d23); + int max_d = basisu::maximum(max_d01, max_d23); + + low_dot = basisu::minimum(low_dot, min_d); + high_dot = basisu::maximum(high_dot, max_d); + } + + const int low_c = low_dot & 15; + const int high_c = high_dot & 15; + + int p0, p1, lr, lg, lb, la, hr, hg, hb, ha; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { (float)pPixels[low_c].r * q, (float)pPixels[low_c].g * q, (float)pPixels[low_c].b * q, (float)pPixels[low_c].a * q }; + float sxh[4] = { (float)pPixels[high_c].r * q, (float)pPixels[high_c].g * q, (float)pPixels[high_c].b * q, (float)pPixels[high_c].a * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + p0 = best_pbits[0], p1 = best_pbits[1]; + lr = bestMinColor.r, lg = bestMinColor.g, lb = bestMinColor.b, la = bestMinColor.a; + hr = bestMaxColor.r, hg = bestMaxColor.g, hb = bestMaxColor.b, ha = bestMaxColor.a; + } + else + { + p0 = pPixels[low_c].a > 128; + p1 = pPixels[high_c].a > 128; + + lr = to_7(pPixels[low_c].r, p0), lg = to_7(pPixels[low_c].g, p0), lb = to_7(pPixels[low_c].b, p0), la = to_7(pPixels[low_c].a, p0); + hr = to_7(pPixels[high_c].r, p1), hg = to_7(pPixels[high_c].g, p1), hb = to_7(pPixels[high_c].b, p1), ha = to_7(pPixels[high_c].a, p1); + } + + uint8_t cur_weights[16]; + uint32_t mode6_actual_sse = eval_weights_mode6_rgba_sse(pPixels, cur_weights, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1); + + if (mode6_actual_sse) + { + vec4F xl, xh; + bool res = compute_least_squares_endpoints_4D( + 16, cur_weights, 16, + g_bc7_4bit_ls_tab, + xl, xh, + pPixels, + (float)total_r, (float)total_g, (float)total_b, (float)total_a); + + if (res) + { + int trial_p0, trial_p1, trial_lr, trial_lg, trial_lb, trial_la, trial_hr, trial_hg, trial_hb, trial_ha; + + if (flags & cPackBC7FlagPBitOptMode6) + { + const float q = 1.0f / 255.0f; + float sxl[4] = { xl[0] * q, xl[1] * q, xl[2] * q, xl[3] * q }; + float sxh[4] = { xh[0] * q, xh[1] * q, xh[2] * q, xh[3] * q }; + + color_rgba bestMinColor, bestMaxColor; + uint32_t best_pbits[2]; + determine_unique_pbits(4, 7, sxl, sxh, bestMinColor, bestMaxColor, best_pbits); + + trial_p0 = best_pbits[0], trial_p1 = best_pbits[1]; + trial_lr = bestMinColor.r, trial_lg = bestMinColor.g, trial_lb = bestMinColor.b, trial_la = bestMinColor.a; + trial_hr = bestMaxColor.r, trial_hg = bestMaxColor.g, trial_hb = bestMaxColor.b, trial_ha = bestMaxColor.a; + } + else + { + trial_p0 = (xl[3] >= 129.0f); + trial_lr = to_7(xl[0], trial_p0); + trial_lg = to_7(xl[1], trial_p0); + trial_lb = to_7(xl[2], trial_p0); + trial_la = to_7(xl[3], trial_p0); + + trial_p1 = (xh[3] >= 129.0f); + trial_hr = to_7(xh[0], trial_p1); + trial_hg = to_7(xh[1], trial_p1); + trial_hb = to_7(xh[2], trial_p1); + trial_ha = to_7(xh[3], trial_p1); + } + + uint8_t trial_weights[16]; + uint32_t mode6_ls_actual_sse = eval_weights_mode6_rgba_sse(pPixels, trial_weights, + trial_lr, trial_lg, trial_lb, trial_la, trial_p0, + trial_hr, trial_hg, trial_hb, trial_ha, trial_p1); + + if (mode6_ls_actual_sse < mode6_actual_sse) + { + mode6_actual_sse = mode6_ls_actual_sse; + memcpy(cur_weights, trial_weights, 16); + p0 = trial_p0; p1 = trial_p1; + lr = trial_lr; lg = trial_lg; lb = trial_lb; la = trial_la; + hr = trial_hr; hg = trial_hg; hb = trial_hb; ha = trial_ha; + } + } + } + + uint32_t mode7_actual_sse = UINT32_MAX; + uint8_t mode7_candidate_block[sizeof(basist::bc7_block)]; + + uint32_t mode45_actual_sse = UINT32_MAX; + uint8_t mode45_candidate_block[sizeof(basist::bc7_block)]; + + if (mode6_actual_sse) + { + if (non_analytical_flag) + { + // No gates: very expensive. + if (flags & cPackBC7FlagUse2SubsetsRGBA) + { + pack_mode7_rgba(mode7_candidate_block, pPixels, x1, y1, z1, w1, mean_r, mean_g, mean_b, mean_a, 1e+9f, flags, nullptr, &mode7_actual_sse); + } + + if (flags & cPackBC7FlagUseDualPlaneRGBA) + pack_mode4_or_5(mode45_candidate_block, pPixels, (desired_dp_chan >= 0) ? desired_dp_chan : 3, 1e+9f, flags, nullptr, &mode45_actual_sse); // todo: determine best def channel here + } + else + { + float mode6_ortho_ratio; + const float mode6_slam_to_line_sse_est = estimate_slam_to_line_sse_4D(cov4, x1, y1, z1, w1, &mode6_ortho_ratio); + + if ((flags & cPackBC7FlagUse2SubsetsRGBA) && (block_max_var4 >= MIN_BLOCK_MAX_VAR_23SUBSETS_RGBA) && (mode6_ortho_ratio > ORTHO_RATIO_23SUBSET_RATIO_THRESH_RGBA)) + { + const bool high_ortho_energy_flag = (mode6_slam_to_line_sse_est >= HIGH_ORTHO_ENERGY_THRESH_RGBA); + + if (high_ortho_energy_flag) + { +#if BASISU_BC7F_PERF_STATS + g_total_high_ortho_energy++; +#endif + pack_mode7_rgba(mode7_candidate_block, pPixels, x1, y1, z1, w1, mean_r, mean_g, mean_b, mean_a, 1e+9f, flags, nullptr, &mode7_actual_sse); + } + } + + if (desired_dp_chan >= 0) + pack_mode4_or_5(mode45_candidate_block, pPixels, desired_dp_chan, 1e+9f, flags, nullptr, &mode45_actual_sse); + } + } + + const uint32_t best_actual_sse = basisu::minimum(mode6_actual_sse, mode45_actual_sse, mode7_actual_sse); + + if ((mode45_actual_sse != UINT32_MAX) && (best_actual_sse == mode45_actual_sse)) + { + memcpy(pBlock, mode45_candidate_block, sizeof(basist::bc7_block)); + } + else if ((mode7_actual_sse != UINT32_MAX) && (best_actual_sse == mode7_actual_sse)) + { + memcpy(pBlock, mode7_candidate_block, sizeof(basist::bc7_block)); + } + else + { + assert(mode6_actual_sse == best_actual_sse); + + encode_mode6_rgba_block(pBlock, + lr, lg, lb, la, p0, + hr, hg, hb, ha, p1, + cur_weights); + } + +#ifdef _DEBUG + { + // Final sanity checking. + uint32_t expected_actual_sse = calc_sse(pBlock, pPixels); + assert(expected_actual_sse == best_actual_sse); + } +#endif + + return best_actual_sse; + } + + // Routes to either rgb or rgba automatically + uint32_t fast_pack_bc7_auto_rgba(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { + for (uint32_t i = 0; i < 16; i += 4) + { + if ((pPixels[i].a < 255) || (pPixels[i + 1].a < 255) || (pPixels[i + 2].a < 255) || (pPixels[i + 3].a < 255)) + { + if (flags & cPackBC7FlagPartiallyAnalyticalRGBA) + return bc7f::fast_pack_bc7_rgba_partial_analytical(pBlock, pPixels, flags); + else + { + bc7f::fast_pack_bc7_rgba_analytical(pBlock, pPixels, flags); + return 0; + } + } + } + + if (flags & cPackBC7FlagPartiallyAnalyticalRGB) + return bc7f::fast_pack_bc7_rgb_partial_analytical(pBlock, pPixels, flags); + else + { + bc7f::fast_pack_bc7_rgb_analytical(pBlock, pPixels, flags); + return 0; + } + } + + // Source block cannot have alpha. + uint32_t fast_pack_bc7_auto_rgb(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags) + { +// Disabling this check here, because during fuzzing the ktx2 file may lie. In this case it's harmless to output an opaque block. +#if 0 +#if defined(DEBUG) || defined(_DEBUG) + for (uint32_t i = 0; i < 16; i += 4) + { + if ((pPixels[i].a < 255) || (pPixels[i + 1].a < 255) || (pPixels[i + 2].a < 255) || (pPixels[i + 3].a < 255)) + { + // Block can't have alpha here, or the solid color detectors may misfire. + assert(0); + } + } +#endif +#endif + + if (flags & cPackBC7FlagPartiallyAnalyticalRGB) + return bc7f::fast_pack_bc7_rgb_partial_analytical(pBlock, pPixels, flags); + else + { + bc7f::fast_pack_bc7_rgb_analytical(pBlock, pPixels, flags); + return 0; + } + } + + void clear_perf_stats() + { +#if BASISU_BC7F_PERF_STATS +#define BU_CLEAR_BLOCK_STAT(x) x = 0; + BU_CLEAR_BLOCK_STAT(g_total_rgb_calls); + BU_CLEAR_BLOCK_STAT(g_total_rgba_calls); + BU_CLEAR_BLOCK_STAT(g_total_solid_blocks); + BU_CLEAR_BLOCK_STAT(g_total_trivial_mode6_blocks); + BU_CLEAR_BLOCK_STAT(g_total_dp_valid_chans_rgb); + BU_CLEAR_BLOCK_STAT(g_total_dp_valid_chans_a); + BU_CLEAR_BLOCK_STAT(g_total_high_ortho_energy); + BU_CLEAR_BLOCK_STAT(g_total_mode02_evals); + BU_CLEAR_BLOCK_STAT(g_total_mode02_bailouts); + BU_CLEAR_BLOCK_STAT(g_total_mode13_evals); + BU_CLEAR_BLOCK_STAT(g_total_mode13_bailouts); + BU_CLEAR_BLOCK_STAT(g_total_mode45_evals); + BU_CLEAR_BLOCK_STAT(g_total_mode45_bailouts); + BU_CLEAR_BLOCK_STAT(g_total_mode7_evals); + BU_CLEAR_BLOCK_STAT(g_total_mode7_bailouts); +#undef BU_CLEAR_BLOCK_STAT +#endif + } + + void print_perf_stats() + { +#if BASISU_BC7F_PERF_STATS + const uint32_t total_bc7_blocks = g_total_rgb_calls + g_total_rgba_calls; + if (!total_bc7_blocks) + return; + +#define BU_PRINT_BLOCK_STAT(x) printf(#x ": %u %3.2f%%\n", (uint32_t)x, static_cast(x) * 100.0f / (float)total_bc7_blocks); + BU_PRINT_BLOCK_STAT(g_total_rgb_calls); + BU_PRINT_BLOCK_STAT(g_total_rgba_calls); + BU_PRINT_BLOCK_STAT(g_total_solid_blocks); + BU_PRINT_BLOCK_STAT(g_total_trivial_mode6_blocks); + BU_PRINT_BLOCK_STAT(g_total_dp_valid_chans_rgb); + BU_PRINT_BLOCK_STAT(g_total_dp_valid_chans_a); + BU_PRINT_BLOCK_STAT(g_total_high_ortho_energy); + BU_PRINT_BLOCK_STAT(g_total_mode02_evals); + BU_PRINT_BLOCK_STAT(g_total_mode02_bailouts); + BU_PRINT_BLOCK_STAT(g_total_mode13_evals); + BU_PRINT_BLOCK_STAT(g_total_mode13_bailouts); + BU_PRINT_BLOCK_STAT(g_total_mode45_evals); + BU_PRINT_BLOCK_STAT(g_total_mode45_bailouts); + BU_PRINT_BLOCK_STAT(g_total_mode7_evals); + BU_PRINT_BLOCK_STAT(g_total_mode7_bailouts); +#undef BU_PRINT_BLOCK_STAT + +#endif + } + +#if 0 + struct bc7_mode_6 + { + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; +#endif + +#if 0 + // Very basic ASTC LDR 4x4 packer which transcodes BC7 mode 6 RGB/RGBA only to ASTC LDR 4x4. + void fast_pack_astc(void* pBlock, const color_rgba* pPixels) + { + astc_helpers::astc_block* pDst_block = (astc_helpers::astc_block*)pBlock; + + astc_helpers::log_astc_block log_blk; + log_blk.clear(); + log_blk.m_grid_width = 4; + log_blk.m_grid_height = 4; + + const uint32_t fc = *(const uint32_t*)&pPixels[0]; + if (fc == *(const uint32_t*)&pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (*(const uint32_t*)&pPixels[k] != fc) + break; + + if (k == 15) + { + const uint32_t r = pPixels[0].r, g = pPixels[0].g, b = pPixels[0].b, a = pPixels[0].a; + + log_blk.m_solid_color_flag_ldr = true; + log_blk.m_solid_color[0] = (uint16_t)(r | ((uint32_t)r << 8)); + log_blk.m_solid_color[1] = (uint16_t)(g | ((uint32_t)g << 8)); + log_blk.m_solid_color[2] = (uint16_t)(b | ((uint32_t)b << 8)); + log_blk.m_solid_color[3] = (uint16_t)(a | ((uint32_t)a << 8)); + + bool pack_status = astc_helpers::pack_astc_block(*pDst_block, log_blk); + assert(pack_status); + BASISU_NOTE_UNUSED(pack_status); + + return; + } + } + + basist::bc7_block bc7_block; + fast_pack_bc7_auto_rgba((uint8_t*)&bc7_block, pPixels, cPackBC7FlagPBitOpt | cPackBC7FlagPBitOptMode6 | cPackBC7FlagUseTrivialMode6); + + assert(bc7u::determine_bc7_mode(&bc7_block) == 6); + + bc7u::bc7_mode_6& mode6 = *(bc7u::bc7_mode_6*)&bc7_block; + + log_blk.m_num_partitions = 1; + + if ((mode6.m_lo.m_a0 < 127) || (mode6.m_lo.m_a1 < 127)) + { + log_blk.m_color_endpoint_modes[0] = 12; + log_blk.m_endpoint_ise_range = astc_helpers::BISE_96_LEVELS; + log_blk.m_weight_ise_range = astc_helpers::BISE_12_LEVELS; + + const auto& endpoint_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_val_to_ise; + const auto& endpoint_from_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_val; + //const auto& weight_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_rank_to_ISE; + + int p0 = mode6.m_lo.m_p0; + int r0 = bc7f::from_7(mode6.m_lo.m_r0, p0); + int g0 = bc7f::from_7(mode6.m_lo.m_g0, p0); + int b0 = bc7f::from_7(mode6.m_lo.m_b0, p0); + int a0 = bc7f::from_7(mode6.m_lo.m_a0, p0); + + int p1 = mode6.m_hi.m_p1; + int r1 = bc7f::from_7(mode6.m_lo.m_r1, p1); + int g1 = bc7f::from_7(mode6.m_lo.m_g1, p1); + int b1 = bc7f::from_7(mode6.m_lo.m_b1, p1); + int a1 = bc7f::from_7(mode6.m_lo.m_a1, p1); + + log_blk.m_endpoints[0] = endpoint_to_ise[r0]; + log_blk.m_endpoints[1] = endpoint_to_ise[r1]; + + log_blk.m_endpoints[2] = endpoint_to_ise[g0]; + log_blk.m_endpoints[3] = endpoint_to_ise[g1]; + + log_blk.m_endpoints[4] = endpoint_to_ise[b0]; + log_blk.m_endpoints[5] = endpoint_to_ise[b1]; + + log_blk.m_endpoints[6] = endpoint_to_ise[a0]; + log_blk.m_endpoints[7] = endpoint_to_ise[a1]; + + int s0 = endpoint_from_ise[log_blk.m_endpoints[0]] + endpoint_from_ise[log_blk.m_endpoints[2]] + endpoint_from_ise[log_blk.m_endpoints[4]]; + int s1 = endpoint_from_ise[log_blk.m_endpoints[1]] + endpoint_from_ise[log_blk.m_endpoints[3]] + endpoint_from_ise[log_blk.m_endpoints[5]]; + + int invw = 0; + if (s1 < s0) + { + std::swap(log_blk.m_endpoints[0], log_blk.m_endpoints[1]); + std::swap(log_blk.m_endpoints[2], log_blk.m_endpoints[3]); + std::swap(log_blk.m_endpoints[4], log_blk.m_endpoints[5]); + std::swap(log_blk.m_endpoints[6], log_blk.m_endpoints[7]); + std::swap(g0, g1); + std::swap(b0, b1); + invw = 15; + } + + static const uint8_t s_pWeight_to_ise[16] = { 0, 4, 8, 8, 2, 6, 10, 10, 11, 11, 7, 3, 9, 9, 5, 1 }; + + log_blk.m_weights[0] = s_pWeight_to_ise[mode6.m_hi.m_s00 ^ invw]; + log_blk.m_weights[1] = s_pWeight_to_ise[mode6.m_hi.m_s10 ^ invw]; + log_blk.m_weights[2] = s_pWeight_to_ise[mode6.m_hi.m_s20 ^ invw]; + log_blk.m_weights[3] = s_pWeight_to_ise[mode6.m_hi.m_s30 ^ invw]; + + log_blk.m_weights[4] = s_pWeight_to_ise[mode6.m_hi.m_s01 ^ invw]; + log_blk.m_weights[5] = s_pWeight_to_ise[mode6.m_hi.m_s11 ^ invw]; + log_blk.m_weights[6] = s_pWeight_to_ise[mode6.m_hi.m_s21 ^ invw]; + log_blk.m_weights[7] = s_pWeight_to_ise[mode6.m_hi.m_s31 ^ invw]; + + log_blk.m_weights[8] = s_pWeight_to_ise[mode6.m_hi.m_s02 ^ invw]; + log_blk.m_weights[9] = s_pWeight_to_ise[mode6.m_hi.m_s12 ^ invw]; + log_blk.m_weights[10] = s_pWeight_to_ise[mode6.m_hi.m_s22 ^ invw]; + log_blk.m_weights[11] = s_pWeight_to_ise[mode6.m_hi.m_s32 ^ invw]; + + log_blk.m_weights[12] = s_pWeight_to_ise[mode6.m_hi.m_s03 ^ invw]; + log_blk.m_weights[13] = s_pWeight_to_ise[mode6.m_hi.m_s13 ^ invw]; + log_blk.m_weights[14] = s_pWeight_to_ise[mode6.m_hi.m_s23 ^ invw]; + log_blk.m_weights[15] = s_pWeight_to_ise[mode6.m_hi.m_s33 ^ invw]; + } + else + { + log_blk.m_color_endpoint_modes[0] = 8; + log_blk.m_endpoint_ise_range = astc_helpers::BISE_192_LEVELS; + log_blk.m_weight_ise_range = astc_helpers::BISE_16_LEVELS; + + const auto& endpoint_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_val_to_ise; + const auto& endpoint_from_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_val; + const auto& weight_to_ise = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range).m_rank_to_ISE; + + int p0 = mode6.m_lo.m_p0; + int r0 = bc7f::from_7(mode6.m_lo.m_r0, p0); + int g0 = bc7f::from_7(mode6.m_lo.m_g0, p0); + int b0 = bc7f::from_7(mode6.m_lo.m_b0, p0); + + int p1 = mode6.m_hi.m_p1; + int r1 = bc7f::from_7(mode6.m_lo.m_r1, p1); + int g1 = bc7f::from_7(mode6.m_lo.m_g1, p1); + int b1 = bc7f::from_7(mode6.m_lo.m_b1, p1); + + log_blk.m_endpoints[0] = endpoint_to_ise[r0]; + log_blk.m_endpoints[1] = endpoint_to_ise[r1]; + + log_blk.m_endpoints[2] = endpoint_to_ise[g0]; + log_blk.m_endpoints[3] = endpoint_to_ise[g1]; + + log_blk.m_endpoints[4] = endpoint_to_ise[b0]; + log_blk.m_endpoints[5] = endpoint_to_ise[b1]; + + int s0 = endpoint_from_ise[log_blk.m_endpoints[0]] + endpoint_from_ise[log_blk.m_endpoints[2]] + endpoint_from_ise[log_blk.m_endpoints[4]]; + int s1 = endpoint_from_ise[log_blk.m_endpoints[1]] + endpoint_from_ise[log_blk.m_endpoints[3]] + endpoint_from_ise[log_blk.m_endpoints[5]]; + + int invw = 0; + if (s1 < s0) + { + std::swap(log_blk.m_endpoints[0], log_blk.m_endpoints[1]); + std::swap(log_blk.m_endpoints[2], log_blk.m_endpoints[3]); + std::swap(log_blk.m_endpoints[4], log_blk.m_endpoints[5]); + invw = 15; + } + + log_blk.m_weights[0] = weight_to_ise[(size_t)(mode6.m_hi.m_s00 ^ invw)]; + log_blk.m_weights[1] = weight_to_ise[(size_t)(mode6.m_hi.m_s10 ^ invw)]; + log_blk.m_weights[2] = weight_to_ise[(size_t)(mode6.m_hi.m_s20 ^ invw)]; + log_blk.m_weights[3] = weight_to_ise[(size_t)(mode6.m_hi.m_s30 ^ invw)]; + + log_blk.m_weights[4] = weight_to_ise[(size_t)(mode6.m_hi.m_s01 ^ invw)]; + log_blk.m_weights[5] = weight_to_ise[(size_t)(mode6.m_hi.m_s11 ^ invw)]; + log_blk.m_weights[6] = weight_to_ise[(size_t)(mode6.m_hi.m_s21 ^ invw)]; + log_blk.m_weights[7] = weight_to_ise[(size_t)(mode6.m_hi.m_s31 ^ invw)]; + + log_blk.m_weights[8] = weight_to_ise[(size_t)(mode6.m_hi.m_s02 ^ invw)]; + log_blk.m_weights[9] = weight_to_ise[(size_t)(mode6.m_hi.m_s12 ^ invw)]; + log_blk.m_weights[10] = weight_to_ise[(size_t)(mode6.m_hi.m_s22 ^ invw)]; + log_blk.m_weights[11] = weight_to_ise[(size_t)(mode6.m_hi.m_s32 ^ invw)]; + + log_blk.m_weights[12] = weight_to_ise[(size_t)(mode6.m_hi.m_s03 ^ invw)]; + log_blk.m_weights[13] = weight_to_ise[(size_t)(mode6.m_hi.m_s13 ^ invw)]; + log_blk.m_weights[14] = weight_to_ise[(size_t)(mode6.m_hi.m_s23 ^ invw)]; + log_blk.m_weights[15] = weight_to_ise[(size_t)(mode6.m_hi.m_s33 ^ invw)]; + } + + bool pack_status = astc_helpers::pack_astc_block(*pDst_block, log_blk); + assert(pack_status); + BASISU_NOTE_UNUSED(pack_status); + } +#endif + +} // namespace bc7f + +namespace etc1f +{ +#include "basisu_etc1_mods.inl" + + // flip 0: + // 0011 + // 0011 + // 0011 + // 0011 + + // flip 1: + // 0000 + // 0000 + // 1111 + // 1111 + + uint8_t g_nearest5[256], g_nearest4[256]; + + const uint32_t NUM_SOLID_MODS = 4; + + uint8_t g_solid8_5_base[256][NUM_SOLID_MODS][4]; // [desired8][mod][sel] + uint8_t g_solid8_5_err[256][NUM_SOLID_MODS][4]; + uint8_t g_solid8_4_base[256][NUM_SOLID_MODS][4]; + uint8_t g_solid8_4_err[256][NUM_SOLID_MODS][4]; + + uint8_t g_solid_grayscale_etc1_blocks[256][8]; + + inline int expand5(int v5) + { + return (v5 << 3) | (v5 >> 2); + } + + inline int expand4(int v4) + { + return (v4 << 4) | v4; + } + + static inline int dequant4(uint32_t v) + { + assert(v < 16); + return (v << 4) | v; + } + + static inline int dequant5(uint32_t v) + { + assert(v < 32); + return (v << 3) | (v >> 2); + } + + void init() + { + for (int i = 0; i < 256; i++) + { + int best_e = INT_MAX, best_idx = 0; + + for (int s = 0; s < 32; s++) + { + int recovered = (s << 3) | (s >> 2); + int e = basisu::iabs(recovered - i); + if (e < best_e) + { + best_e = e; + best_idx = s; + } + } + + g_nearest5[i] = (uint8_t)best_idx; + } + + for (int i = 0; i < 256; i++) + { + int best_e = INT_MAX, best_idx = 0; + + for (int s = 0; s < 16; s++) + { + int recovered = (s << 4) | s; + int e = basisu::iabs(recovered - i); + if (e < best_e) + { + best_e = e; + best_idx = s; + } + } + + g_nearest4[i] = (uint8_t)best_idx; + } + + for (uint32_t desired8 = 0; desired8 < 256; desired8++) + { + for (uint32_t mod = 0; mod < NUM_SOLID_MODS; mod++) + { + for (uint32_t sel = 0; sel < 4; sel++) + { + int32_t best_err = INT32_MAX; + uint32_t best_base = 0; + + for (uint32_t b = 0; b < 32; b++) + { + int val = basisu::clamp(dequant5(b) + g_etc1_inten_tables[mod][sel], 0, 255); + int err = basisu::iabs(val - desired8); + + if (err < best_err) + { + best_err = err; + best_base = b; + if (!best_err) + break; + } + + } // b + + g_solid8_5_base[desired8][mod][sel] = (uint8_t)best_base; + g_solid8_5_err[desired8][mod][sel] = (uint8_t)basisu::minimum(255, best_err * best_err); + + } // sel + + } // mod + + } // desired8 + + for (uint32_t desired8 = 0; desired8 < 256; desired8++) + { + for (uint32_t mod = 0; mod < NUM_SOLID_MODS; mod++) + { + for (uint32_t sel = 0; sel < 4; sel++) + { + int32_t best_err = INT32_MAX; + uint32_t best_base = 0; + + for (uint32_t b = 0; b < 16; b++) + { + int val = basisu::clamp(dequant4(b) + g_etc1_inten_tables[mod][sel], 0, 255); + int err = basisu::iabs(val - desired8); + + if (err < best_err) + { + best_err = err; + best_base = b; + if (!best_err) + break; + } + + } // b + + g_solid8_4_base[desired8][mod][sel] = (uint8_t)best_base; + g_solid8_4_err[desired8][mod][sel] = (uint8_t)basisu::minimum(255, best_err * best_err); + + } // sel + + } // mod + + } // desired8 + + pack_etc1_state pack_state; + + for (uint32_t i = 0; i <= 255; i++) + { + etc1f::pack_etc1_solid(&g_solid_grayscale_etc1_blocks[i][0], color_rgba(i, i, i, 255), pack_state, true); + } + } + + inline int dequant_d3(int8_t v) + { + assert(v <= 7); + return (int8_t(v << 5) >> 5); + } + + void get_block_colors(uint8_t* pBlock, color_rgba* pColors0, color_rgba* pColors1) + { + const uint32_t b0 = pBlock[0], b1 = pBlock[1], b2 = pBlock[2], b3 = pBlock[3]; + + int base8_r[2], base8_g[2], base8_b[2]; + + if (b3 & 2) + { + // diff mode + base8_r[0] = dequant5(b0 >> 3); + base8_r[1] = dequant5(basisu::clamp((b0 >> 3) + dequant_d3(b0 & 7), 0, 31)); + + base8_g[0] = dequant5(b1 >> 3); + base8_g[1] = dequant5(basisu::clamp((b1 >> 3) + dequant_d3(b1 & 7), 0, 31)); + + base8_b[0] = dequant5(b2 >> 3); + base8_b[1] = dequant5(basisu::clamp((b2 >> 3) + dequant_d3(b2 & 7), 0, 31)); + } + else + { + // abs mode + base8_r[0] = dequant4(b0 >> 4); + base8_r[1] = dequant4(b0 & 15); + + base8_g[0] = dequant4(b1 >> 4); + base8_g[1] = dequant4(b1 & 15); + + base8_b[0] = dequant4(b2 >> 4); + base8_b[1] = dequant4(b2 & 15); + } + + const int* pInten_table0 = &g_etc1_inten_tables[b3 >> 5][0]; + const int* pInten_table1 = &g_etc1_inten_tables[(b3 >> 2) & 7][0]; + + for (uint32_t i = 0; i < 4; i++) + { + const int d = pInten_table0[i]; + pColors0[i].r = (uint8_t)clamp255(base8_r[0] + d); + pColors0[i].g = (uint8_t)clamp255(base8_g[0] + d); + pColors0[i].b = (uint8_t)clamp255(base8_b[0] + d); + pColors0[i].a = 0; + } + + for (uint32_t i = 0; i < 4; i++) + { + const int d = pInten_table1[i]; + pColors1[i].r = (uint8_t)clamp255(base8_r[1] + d); + pColors1[i].g = (uint8_t)clamp255(base8_g[1] + d); + pColors1[i].b = (uint8_t)clamp255(base8_b[1] + d); + pColors1[i].a = 0; + } + } + + void get_block_colors_y(uint8_t* pBlock, uint8_t* pColors0, uint8_t* pColors1) + { + //const uint32_t b0 = pBlock[0], b1 = pBlock[1], b2 = pBlock[2], b3 = pBlock[3]; + const uint32_t b0 = pBlock[0], b3 = pBlock[3]; + + int base8_y[2]; + + if (b3 & 2) + { + // diff mode + base8_y[0] = dequant5(b0 >> 3); + base8_y[1] = dequant5(basisu::clamp((b0 >> 3) + dequant_d3(b0 & 7), 0, 31)); + } + else + { + // abs mode + base8_y[0] = dequant4(b0 >> 4); + base8_y[1] = dequant4(b0 & 15); + } + + const int* pInten_table0 = g_etc1_inten_tables[b3 >> 5]; + const int* pInten_table1 = g_etc1_inten_tables[(b3 >> 2) & 7]; + + for (uint32_t i = 0; i < 4; i++) + { + const int d = pInten_table0[i]; + pColors0[i] = (uint8_t)clamp255(base8_y[0] + d); + } + + for (uint32_t i = 0; i < 4; i++) + { + const int d = pInten_table1[i]; + pColors1[i] = (uint8_t)clamp255(base8_y[1] + d); + } + } + + static inline int q4_floor(int x) { return (x * 15) / 255; } + static inline int q5_floor(int x) { return (x * 31) / 255; } + + void corr_round_555(int R, int G, int B, int& qR, int& qG, int& qB) + { + int rL = q5_floor(R), gL = q5_floor(G), bL = q5_floor(B); + int rH = (rL < 31) ? (rL + 1) : 31; + int gH = (gL < 31) ? (gL + 1) : 31; + int bH = (bL < 31) ? (bL + 1) : 31; + + int r8[2] = { expand5(rL), expand5(rH) }; + int g8[2] = { expand5(gL), expand5(gH) }; + int b8[2] = { expand5(bL), expand5(bH) }; + + int tr = r8[0], tg = g8[0], tb = b8[0]; + int eR = R - tr, eG = G - tg, eB = B - tb; + int bestJ = basisu::squarei(eR - eG) + basisu::squarei(eG - eB) + basisu::squarei(eB - eR); + int br = tr, bg = tg, bb = tb; + + for (int m = 1; m < 8; ++m) + { + tr = r8[m & 1], tg = g8[(m >> 1) & 1], tb = b8[(m >> 2) & 1]; + eR = R - tr, eG = G - tg, eB = B - tb; + + int J = basisu::squarei(eR - eG) + basisu::squarei(eG - eB) + basisu::squarei(eB - eR); + if (J < bestJ) + { + bestJ = J; + br = tr; + bg = tg; + bb = tb; + } + } + + qR = br >> 3; qG = bg >> 3; qB = bb >> 3; + } + + void corr_round_444(int R, int G, int B, int& qR, int& qG, int& qB) + { + int rL = q4_floor(R), gL = q4_floor(G), bL = q4_floor(B); + int rH = (rL < 15) ? (rL + 1) : 15; + int gH = (gL < 15) ? (gL + 1) : 15; + int bH = (bL < 15) ? (bL + 1) : 15; + + int r8[2] = { expand4(rL), expand4(rH) }; + int g8[2] = { expand4(gL), expand4(gH) }; + int b8[2] = { expand4(bL), expand4(bH) }; + + int tr = r8[0], tg = g8[0], tb = b8[0]; + int eR = R - tr, eG = G - tg, eB = B - tb; + int bestJ = basisu::squarei(eR - eG) + basisu::squarei(eG - eB) + basisu::squarei(eB - eR); + int br = tr, bg = tg, bb = tb; + + for (int m = 1; m < 8; ++m) + { + tr = r8[m & 1], tg = g8[(m >> 1) & 1], tb = b8[(m >> 2) & 1]; + eR = R - tr, eG = G - tg, eB = B - tb; + + int J = basisu::squarei(eR - eG) + basisu::squarei(eG - eB) + basisu::squarei(eB - eR); + if (J < bestJ) + { + bestJ = J; + br = tr; + bg = tg; + bb = tb; + } + } + + qR = br >> 4; qG = bg >> 4; qB = bb >> 4; + } + + inline bool quantize_444_color_correlated(int mean8_r, int mean8_g, int mean8_b, int enc_color[3], bool early_out = true) + { + // Floor to low 4-bit + int r4_low = (mean8_r * 15) / 255; + int g4_low = (mean8_g * 15) / 255; + int b4_low = (mean8_b * 15) / 255; + + // High = +1, clamped + int r4_high = basisu::clamp(r4_low + 1, 0, 15); + int g4_high = basisu::clamp(g4_low + 1, 0, 15); + int b4_high = basisu::clamp(b4_low + 1, 0, 15); + + const int r8_low = expand4(r4_low); + const int g8_low = expand4(g4_low); + const int b8_low = expand4(b4_low); + const int r8_high = expand4(r4_high); + const int g8_high = expand4(g4_high); + const int b8_high = expand4(b4_high); + + // Errors if we pick "low" + const float dr = float(r8_low) - mean8_r; + const float dg = float(g8_low) - mean8_g; + const float db = float(b8_low) - mean8_b; + + const float dRG = fabsf(dr - dg); + const float dRB = fabsf(dr - db); + const float dGB = fabsf(dg - db); + const float maxSpread = basisu::maximum(dRG, dRB, dGB); + + if ((early_out) && (maxSpread <= 1.0f)) + return false; + + // Step sizes low->high + const float kr = float(r8_high - r8_low); + const float kg = float(g8_high - g8_low); + const float kb = float(b8_high - b8_low); + + // Precompute constants for cost(mask) = S0 + Lsum - Ksum^2 + const float D = dr + dg + db; + const float S0 = 3.0f * (dr * dr + dg * dg + db * db) - D * D; + + const float Lr = 6.0f * dr * kr + 3.0f * kr * kr - 2.0f * D * kr; + const float Lg = 6.0f * dg * kg + 3.0f * kg * kg - 2.0f * D * kg; + const float Lb = 6.0f * db * kb + 3.0f * kb * kb - 2.0f * D * kb; + + float bestCost = basisu::BIG_FLOAT_VAL; + int bestMask = 0; + + for (int mask = 0; mask < 8; ++mask) + { + const float Ksum = ((mask & 1) ? kr : 0.0f) + ((mask & 2) ? kg : 0.0f) + ((mask & 4) ? kb : 0.0f); + const float Lsum = ((mask & 1) ? Lr : 0.0f) + ((mask & 2) ? Lg : 0.0f) + ((mask & 4) ? Lb : 0.0f); + + const float cost = S0 + Lsum - Ksum * Ksum; + + if (cost < bestCost) + { + bestCost = cost; + bestMask = mask; + } + } + + enc_color[0] = (bestMask & 1) ? r4_high : r4_low; + enc_color[1] = (bestMask & 2) ? g4_high : g4_low; + enc_color[2] = (bestMask & 4) ? b4_high : b4_low; + + assert((enc_color[0]) >= 0 && (enc_color[0] <= 15)); + assert((enc_color[1]) >= 0 && (enc_color[1] <= 15)); + assert((enc_color[2]) >= 0 && (enc_color[2] <= 15)); + + return true; + } + + void pack_etc1_solid(uint8_t* pBlock, const color_rgba& color, pack_etc1_state& state, bool init_flag) + { + uint32_t r8 = color[0], g8 = color[1], b8 = color[2]; + //const uint32_t r8 = 0, g8 = 0, b8 = 0; + + if (!init_flag) + { + if ((r8 == g8) && (r8 == b8)) + { + memcpy(pBlock, &g_solid_grayscale_etc1_blocks[r8][0], sizeof(decoder_etc_block)); + return; + } + + if ((state.m_prev_solid_r8 == (int)r8) && (state.m_prev_solid_g8 == (int)g8) && (state.m_prev_solid_b8 == (int)b8)) + { + memcpy(pBlock, &state.m_prev_solid_block, sizeof(decoder_etc_block)); + return; + } + } + + uint32_t best_err = UINT32_MAX; + uint32_t best_mod = 0, best_sel = 0; + uint32_t best4_flag = false; + + const int RW = 2, GW = 4; + + for (uint32_t mod = 0; mod < NUM_SOLID_MODS; mod++) + { + for (uint32_t sel = 0; sel < 4; sel++) + { + uint32_t total_err5 = RW * g_solid8_5_err[r8][mod][sel] + GW * g_solid8_5_err[g8][mod][sel] + g_solid8_5_err[b8][mod][sel]; + if (total_err5 < best_err) + { + best_err = total_err5; + best_mod = mod; + best_sel = sel; + best4_flag = false; + if (!best_err) + goto etc1_solid_done; + } + + uint32_t total_err4 = RW * g_solid8_4_err[r8][mod][sel] + GW * g_solid8_4_err[g8][mod][sel] + g_solid8_4_err[b8][mod][sel]; + if (total_err4 < best_err) + { + best_err = total_err4; + best_mod = mod; + best_sel = sel; + best4_flag = true; + } + + } // sel + } // mod + + etc1_solid_done: + + if (best4_flag) + { + const uint32_t best_base_r4 = g_solid8_4_base[r8][best_mod][best_sel]; + const uint32_t best_base_g4 = g_solid8_4_base[g8][best_mod][best_sel]; + const uint32_t best_base_b4 = g_solid8_4_base[b8][best_mod][best_sel]; + + pBlock[0] = (uint8_t)(best_base_r4 | (best_base_r4 << 4)); + pBlock[1] = (uint8_t)(best_base_g4 | (best_base_g4 << 4)); + pBlock[2] = (uint8_t)(best_base_b4 | (best_base_b4 << 4)); + } + else + { + pBlock[0] = (uint8_t)(g_solid8_5_base[r8][best_mod][best_sel] << 3); + pBlock[1] = (uint8_t)(g_solid8_5_base[g8][best_mod][best_sel] << 3); + pBlock[2] = (uint8_t)(g_solid8_5_base[b8][best_mod][best_sel] << 3); + } + + const uint32_t flip = 0; + const uint32_t diff = (best4_flag == 0); + pBlock[3] = (uint8_t)(flip | (diff << 1) | (best_mod << 5) | (best_mod << 2)); + + const uint32_t etc1_sels = g_selector_index_to_etc1[best_sel]; + + const uint8_t lb = (etc1_sels & 2) ? 0xFF : 0; + pBlock[4] = lb; + pBlock[5] = lb; + + const uint8_t hb = (etc1_sels & 1) ? 0xFF : 0; + pBlock[6] = hb; + pBlock[7] = hb; + + state.m_prev_solid_r8 = r8; + state.m_prev_solid_g8 = g8; + state.m_prev_solid_b8 = b8; + memcpy(&state.m_prev_solid_block, pBlock, sizeof(decoder_etc_block)); + } + + static const uint8_t s_vi[16] = { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }; + static const uint8_t s_hi[16] = { 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + + static const uint8_t s_subsets[2][16] = + { + { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 } + }; + + // [flip][subblock][sel][l/h] + static const uint16_t s_sel_bitmasks[2 * 2 * 4][2] = + { + // flip=0, subblock=0, sels=0-3 + { 0xff, 0xff }, + { 0x0, 0xff }, + { 0x0, 0x0 }, + { 0xff, 0x0 }, + + // flip=0, subblock=1, sels=0-3 + { 0xff00, 0xff00 }, + { 0x0, 0xff00 }, + { 0x0, 0x0 }, + { 0xff00, 0x0 }, + + // flip=1, subblock=0, sels=0-3 + { 0x3333, 0x3333 }, + { 0x0, 0x3333 }, + { 0x0, 0x0 }, + { 0x3333, 0x0 }, + + // flip=1, subblock=1, sels=0-3 + { 0xcccc, 0xcccc }, + { 0x0, 0xcccc }, + { 0x0, 0x0 }, + { 0xcccc, 0x0 } + }; + + void pack_etc1_solid_subblocks(uint8_t* pBlock, const color_rgba* pPixels, const color_rgba subblock_means[2], uint32_t flip) + { + (void)pPixels; + + uint32_t best_mod5[2] = {}, best_sel5[2] = {}; + uint32_t best_base5[2][3] = {}; + uint32_t best_err5[2] = { UINT32_MAX, UINT32_MAX }; + + uint32_t best_mod4[2] = {}, best_sel4[2] = {}; + uint32_t best_base4[2][3] = {}; + uint32_t best_err4[2] = { UINT32_MAX, UINT32_MAX }; + + const int RW = 2, GW = 4; + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t r8 = subblock_means[t][0]; + const uint32_t g8 = subblock_means[t][1]; + const uint32_t b8 = subblock_means[t][2]; + + const uint8_t* pR5 = &g_solid8_5_err[r8][0][0]; + const uint8_t* pG5 = &g_solid8_5_err[g8][0][0]; + const uint8_t* pB5 = &g_solid8_5_err[b8][0][0]; + + const uint8_t* pR4 = &g_solid8_4_err[r8][0][0]; + const uint8_t* pG4 = &g_solid8_4_err[g8][0][0]; + const uint8_t* pB4 = &g_solid8_4_err[b8][0][0]; + + for (uint32_t mod = 0; mod < NUM_SOLID_MODS; mod++) + { + const uint32_t mod4 = mod << 2; + + const uint32_t total_err5_0 = ((RW * pR5[0] + GW * pG5[0] + pB5[0]) << 5) + (mod4 + 0); + const uint32_t total_err5_1 = ((RW * pR5[1] + GW * pG5[1] + pB5[1]) << 5) + (mod4 + 1); + const uint32_t total_err5_2 = ((RW * pR5[2] + GW * pG5[2] + pB5[2]) << 5) + (mod4 + 2); + const uint32_t total_err5_3 = ((RW * pR5[3] + GW * pG5[3] + pB5[3]) << 5) + (mod4 + 3); + + best_err5[t] = basisu::minimum(best_err5[t], basisu::minimum(total_err5_0, total_err5_1), basisu::minimum(total_err5_2, total_err5_3)); + + const uint32_t total_err4_0 = ((RW * pR4[0] + GW * pG4[0] + pB4[0]) << 5) + (mod4 + 0); + const uint32_t total_err4_1 = ((RW * pR4[1] + GW * pG4[1] + pB4[1]) << 5) + (mod4 + 1); + const uint32_t total_err4_2 = ((RW * pR4[2] + GW * pG4[2] + pB4[2]) << 5) + (mod4 + 2); + const uint32_t total_err4_3 = ((RW * pR4[3] + GW * pG4[3] + pB4[3]) << 5) + (mod4 + 3); + + best_err4[t] = basisu::minimum(best_err4[t], basisu::minimum(total_err4_0, total_err4_1), basisu::minimum(total_err4_2, total_err4_3)); + + pR5 += 4; pG5 += 4; pB5 += 4; + pR4 += 4; pG4 += 4; pB4 += 4; + } // mod + + best_mod5[t] = (best_err5[t] >> 2) & 7; + best_sel5[t] = best_err5[t] & 3; + best_err5[t] >>= 5; + + best_mod4[t] = (best_err4[t] >> 2) & 7; + best_sel4[t] = best_err4[t] & 3; + best_err4[t] >>= 5; + + best_base5[t][0] = g_solid8_5_base[r8][best_mod5[t]][best_sel5[t]]; + best_base5[t][1] = g_solid8_5_base[g8][best_mod5[t]][best_sel5[t]]; + best_base5[t][2] = g_solid8_5_base[b8][best_mod5[t]][best_sel5[t]]; + + best_base4[t][0] = g_solid8_4_base[r8][best_mod4[t]][best_sel4[t]]; + best_base4[t][1] = g_solid8_4_base[g8][best_mod4[t]][best_sel4[t]]; + best_base4[t][2] = g_solid8_4_base[b8][best_mod4[t]][best_sel4[t]]; + + } // t + + uint32_t total_err4 = best_err4[0] + best_err4[1]; + uint32_t total_err5 = best_err5[0] + best_err5[1]; + + bool use_abs = false; + if (total_err4 < total_err5) + { + use_abs = true; + } + else + { + int delta_r = best_base5[1][0] - best_base5[0][0]; + int delta_g = best_base5[1][1] - best_base5[0][1]; + int delta_b = best_base5[1][2] - best_base5[0][2]; + + if ((delta_r < -4) || (delta_r > 3) || + (delta_g < -4) || (delta_g > 3) || + (delta_b < -4) || (delta_b > 3)) + { + use_abs = true; + } + } + + uint32_t* pBest_sels; + + if (use_abs) + { + pBlock[0] = (uint8_t)(best_base4[1][0] | (best_base4[0][0] << 4)); + pBlock[1] = (uint8_t)(best_base4[1][1] | (best_base4[0][1] << 4)); + pBlock[2] = (uint8_t)(best_base4[1][2] | (best_base4[0][2] << 4)); + + const uint32_t diff = false; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (best_mod4[0] << 5) | (best_mod4[1] << 2)); + + pBest_sels = best_sel4; + } + else + { + const int delta_r = (best_base5[1][0] - best_base5[0][0]) & 7; + const int delta_g = (best_base5[1][1] - best_base5[0][1]) & 7; + const int delta_b = (best_base5[1][2] - best_base5[0][2]) & 7; + + pBlock[0] = (uint8_t)(delta_r | (best_base5[0][0] << 3)); + pBlock[1] = (uint8_t)(delta_g | (best_base5[0][1] << 3)); + pBlock[2] = (uint8_t)(delta_b | (best_base5[0][2] << 3)); + + const uint32_t diff = 1; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (best_mod5[0] << 5) | (best_mod5[1] << 2)); + + pBest_sels = best_sel5; + } + + uint16_t l_bitmask = 0, h_bitmask = 0; + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + uint32_t best_etc1_sel = pBest_sels[subblock]; + + l_bitmask |= s_sel_bitmasks[flip * 8 + subblock * 4 + best_etc1_sel][0]; + h_bitmask |= s_sel_bitmasks[flip * 8 + subblock * 4 + best_etc1_sel][1]; + } + + pBlock[7] = (uint8_t)(l_bitmask); + pBlock[6] = (uint8_t)(l_bitmask >> 8); + pBlock[5] = (uint8_t)(h_bitmask); + pBlock[4] = (uint8_t)(h_bitmask >> 8); + } + + //------------------------------------------ + + void pack_etc1(uint8_t* pBlock, const color_rgba* pPixels, pack_etc1_state& state) + { + { + // Solid block check, ignoring alpha. + const uint32_t fc = *(const uint32_t*)&pPixels[0] & BASISD_COLOR_RGBA_RGB_MASK; + + if (fc == (*(const uint32_t*)&pPixels[15] & BASISD_COLOR_RGBA_RGB_MASK)) + { + int k; + for (k = 1; k < 15; k++) + if ((*(const uint32_t*)&pPixels[k] & BASISD_COLOR_RGBA_RGB_MASK) != fc) + break; + + if (k == 15) + { + pack_etc1_solid(pBlock, pPixels[0], state, false); + return; + } + } + } + + // [0]=left, [1]=right, [2]=top, [3]=bottom + int accum_y[4] = { 0 }, accum_y2[4] = { 0 }, accum_c2[4] = { 0 }; + int total_c2 = 0, max_c2 = 0; + + for (uint32_t i = 0; i < 16; i++) + { + int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + int rg = r - g, bg = b - g; + + int y = (r + g + b + 1) / 3; + int y2 = y * y, c2 = rg * rg + bg * bg; + + total_c2 += c2; + max_c2 = basisu::maximum(max_c2, c2); + + const int vi = s_vi[i], hi = s_hi[i]; + + accum_y[vi] += y; + accum_y2[vi] += y2; + accum_c2[vi] += c2; + + accum_y[hi] += y; + accum_y2[hi] += y2; + accum_c2[hi] += c2; + + } // i + +#if 1 + // sqrt(300/16)=~4.33 + const int CHROMA_ENERGY_SUM_THRESH = 300; + const int CHROMA_ENERGY_MAX_THRESH = 32; + if ((total_c2 < CHROMA_ENERGY_SUM_THRESH) && (max_c2 < CHROMA_ENERGY_MAX_THRESH)) + { + //memset(pBlock, 0, 8); + //return; + + uint8_t y_pixels[16]; + if (total_c2 == 0) + { + for (uint32_t i = 0; i < 16; i++) + y_pixels[i] = pPixels[i].r; + } + else + { + for (uint32_t i = 0; i < 16; i++) + y_pixels[i] = (uint8_t)pPixels[i].get_709_luma(); + } + pack_etc1_grayscale(pBlock, y_pixels, state); + return; + } +#endif + + int var_y_scaled[4]; // scaled by x64 (8*8) + for (uint32_t i = 0; i < 4; i++) + var_y_scaled[i] = basisu::maximum(0, (accum_y2[i] << 3) - (accum_y[i] * accum_y[i])); // max not needed + + float std_luma[4], std_chroma[4]; + for (uint32_t i = 0; i < 4; i++) + { + std_luma[i] = sqrtf((float)var_y_scaled[i] * (1.0f / 64.0f)); + std_chroma[i] = sqrtf((float)accum_c2[i] * (1.0f / 8.0f)); + } + + const float LUMA_SCALE = 2, CHROMA_SCALE = 1; + float flip0_score = (std_luma[0] + std_luma[1]) * LUMA_SCALE + (std_chroma[0] + std_chroma[1]) * CHROMA_SCALE; + float flip1_score = (std_luma[2] + std_luma[3]) * LUMA_SCALE + (std_chroma[2] + std_chroma[3]) * CHROMA_SCALE; + + const uint32_t flip = flip1_score < flip0_score; + + int var8_y[2] = {}, mean8_y[2] = {}, mean8_r[2] = {}, mean8_g[2] = {}, mean8_b[2] = {}; + int min_y[2] = { INT_MAX, INT_MAX }, max_y[2] = { INT_MIN, INT_MIN }; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b; + const int y = (r + g + b + 1) / 3; + + const uint32_t s = s_subsets[flip][i]; + + var8_y[s] += y * y; + mean8_y[s] += y; + + mean8_r[s] += r; + mean8_g[s] += g; + mean8_b[s] += b; + + min_y[s] = basisu::minimum(min_y[s], y); + max_y[s] = basisu::maximum(max_y[s], y); + } + + //memset(pBlock, 0, sizeof(etc_block)); + //return; + + if (((max_y[0] - min_y[0]) < 8) && ((max_y[1] - min_y[1]) < 8)) + { + color_rgba subblock_means[2] = { + color_rgba((mean8_r[0] + 4) / 8, (mean8_g[0] + 4) / 8, (mean8_b[0] + 4) / 8, 255), + color_rgba((mean8_r[1] + 4) / 8, (mean8_g[1] + 4) / 8, (mean8_b[1] + 4) / 8, 255), + }; + + if (subblock_means[0] == subblock_means[1]) + pack_etc1_solid(pBlock, subblock_means[0], state, false); + else + pack_etc1_solid_subblocks(pBlock, pPixels, subblock_means, flip); + + return; + } + + //memset(pBlock, 0, sizeof(etc_block)); + //return; + + int half_span8_y[2]; + float stddev_y[2]; + + for (uint32_t i = 0; i < 2; i++) + { + var8_y[i] = basisu::maximum(0, (var8_y[i] << 3) - mean8_y[i] * mean8_y[i]); + stddev_y[i] = std::sqrt(static_cast(var8_y[i])) * (1.0f / 8.0f); + + mean8_y[i] = (mean8_y[i] + 4) >> 3; + + mean8_r[i] = (mean8_r[i] + 4) >> 3; + mean8_g[i] = (mean8_g[i] + 4) >> 3; + mean8_b[i] = (mean8_b[i] + 4) >> 3; + + half_span8_y[i] = basisu::maximum(max_y[i] - mean8_y[i], mean8_y[i] - min_y[i]); + } + + int stddev[2] = + { + basisu::clamp((int)ceilf(9.0f * (stddev_y[0] / (float)basisu::maximum(1, half_span8_y[0]))) - 1, 0, 7), + basisu::clamp((int)ceilf(9.0f * (stddev_y[1] / (float)basisu::maximum(1, half_span8_y[1]))) - 1, 0, 7) + }; + + uint32_t mod_tab[2]; + for (uint32_t i = 0; i < 2; i++) + mod_tab[i] = g_etc1_mod_tabs[basisu::clamp(half_span8_y[i], 1, 255)][stddev[i]]; + + int mean5_r[2], mean5_g[2], mean5_b[2]; + + for (uint32_t i = 0; i < 2; i++) + { +#if 1 + corr_round_555(mean8_r[i], mean8_g[i], mean8_b[i], mean5_r[i], mean5_g[i], mean5_b[i]); +#else + mean5_r[i] = g_nearest5[mean8_r[i]]; + mean5_g[i] = g_nearest5[mean8_g[i]]; + mean5_b[i] = g_nearest5[mean8_b[i]]; +#endif + } + + int delta5_r = mean5_r[1] - mean5_r[0]; + int delta5_g = mean5_g[1] - mean5_g[0]; + int delta5_b = mean5_b[1] - mean5_b[0]; + + const uint32_t z = (delta5_r + 4) | (delta5_g + 4) | (delta5_b + 4); + bool use_abs_colors4 = z > 7; + + if (!use_abs_colors4) + { + assert((delta5_r >= -4) && (delta5_r <= 3)); + assert((delta5_g >= -4) && (delta5_g <= 3)); + assert((delta5_b >= -4) && (delta5_b <= 3)); + } + + if (use_abs_colors4) + { + int mean4_r[2], mean4_g[2], mean4_b[2]; + + for (uint32_t i = 0; i < 2; i++) + { +#if 1 + corr_round_444(mean8_r[i], mean8_g[i], mean8_b[i], mean4_r[i], mean4_g[i], mean4_b[i]); +#else + mean4_r[i] = g_nearest4[mean8_r[i]]; + mean4_g[i] = g_nearest4[mean8_g[i]]; + mean4_b[i] = g_nearest4[mean8_b[i]]; +#endif + } // i + + pBlock[0] = (uint8_t)(mean4_r[1] | (mean4_r[0] << 4)); + pBlock[1] = (uint8_t)(mean4_g[1] | (mean4_g[0] << 4)); + pBlock[2] = (uint8_t)(mean4_b[1] | (mean4_b[0] << 4)); + + const uint32_t diff = 0; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (mod_tab[0] << 5) | (mod_tab[1] << 2)); + } + else + { + pBlock[0] = (uint8_t)((delta5_r & 7) | (mean5_r[0] << 3)); + pBlock[1] = (uint8_t)((delta5_g & 7) | (mean5_g[0] << 3)); + pBlock[2] = (uint8_t)((delta5_b & 7) | (mean5_b[0] << 3)); + + const uint32_t diff = 1; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (mod_tab[0] << 5) | (mod_tab[1] << 2)); + } + + uint16_t l_bitmask = 0; + uint16_t h_bitmask = 0; + + static const uint8_t s_tran[4] = { 1, 0, 2, 3 }; + + color_rgba subblock_colors[2][4]; + get_block_colors(pBlock, &subblock_colors[0][0], &subblock_colors[1][0]); + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + const color_rgba* block_colors = &subblock_colors[subblock][0]; + + uint32_t block_y[4]; + for (uint32_t i = 0; i < 4; i++) + block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19; + + const uint32_t block_y01 = block_y[0] + block_y[1]; + const uint32_t block_y12 = block_y[1] + block_y[2]; + const uint32_t block_y23 = block_y[2] + block_y[3]; + + if (flip) + { + uint32_t ofs = subblock * 2; + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba& c = pPixels[x + (subblock * 2 + y) * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + const color_rgba& c = pPixels[subblock * 2 + x + y * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + pBlock[7] = (uint8_t)(l_bitmask); + pBlock[6] = (uint8_t)(l_bitmask >> 8); + pBlock[5] = (uint8_t)(h_bitmask); + pBlock[4] = (uint8_t)(h_bitmask >> 8); + + } // subblock + } + + void pack_etc1_grayscale_solid_subblocks(uint8_t* pBlock, const uint8_t* pPixels, const uint8_t subblock_means[2], uint32_t flip) + { + (void)pPixels; + + uint32_t best_mod5[2] = {}, best_sel5[2] = {}; + uint32_t best_base5[2] = {}; + uint32_t best_err5[2] = { UINT32_MAX, UINT32_MAX }; + + uint32_t best_mod4[2] = {}, best_sel4[2] = {}; + uint32_t best_base4[2] = {}; + uint32_t best_err4[2] = { UINT32_MAX, UINT32_MAX }; + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t y8 = subblock_means[t]; + + const uint8_t* pY5 = &g_solid8_5_err[y8][0][0]; + const uint8_t* pY4 = &g_solid8_4_err[y8][0][0]; + + for (uint32_t mod = 0; mod < NUM_SOLID_MODS; mod++) + { + const uint32_t mod4 = mod << 2; + + const uint32_t total_err5_0 = (pY5[0] << 5) + (mod4 + 0); + const uint32_t total_err5_1 = (pY5[1] << 5) + (mod4 + 1); + const uint32_t total_err5_2 = (pY5[2] << 5) + (mod4 + 2); + const uint32_t total_err5_3 = (pY5[3] << 5) + (mod4 + 3); + + best_err5[t] = basisu::minimum(best_err5[t], basisu::minimum(total_err5_0, total_err5_1), basisu::minimum(total_err5_2, total_err5_3)); + + const uint32_t total_err4_0 = (pY4[0] << 5) + (mod4 + 0); + const uint32_t total_err4_1 = (pY4[1] << 5) + (mod4 + 1); + const uint32_t total_err4_2 = (pY4[2] << 5) + (mod4 + 2); + const uint32_t total_err4_3 = (pY4[3] << 5) + (mod4 + 3); + + best_err4[t] = basisu::minimum(best_err4[t], basisu::minimum(total_err4_0, total_err4_1), basisu::minimum(total_err4_2, total_err4_3)); + + pY5 += 4; + pY4 += 4; + } // mod + + best_mod5[t] = (best_err5[t] >> 2) & 7; + best_sel5[t] = best_err5[t] & 3; + best_err5[t] >>= 5; + + best_mod4[t] = (best_err4[t] >> 2) & 7; + best_sel4[t] = best_err4[t] & 3; + best_err4[t] >>= 5; + + best_base5[t] = g_solid8_5_base[y8][best_mod5[t]][best_sel5[t]]; + + best_base4[t] = g_solid8_4_base[y8][best_mod4[t]][best_sel4[t]]; + + } // t + + uint32_t total_err4 = best_err4[0] + best_err4[1]; + uint32_t total_err5 = best_err5[0] + best_err5[1]; + + bool use_abs = false; + if (total_err4 < total_err5) + { + use_abs = true; + } + else + { + int delta_y = best_base5[1] - best_base5[0]; + + if ((delta_y < -4) || (delta_y > 3)) + { + use_abs = true; + } + } + + uint32_t* pBest_sels; + + if (use_abs) + { + pBlock[0] = pBlock[1] = pBlock[2] = (uint8_t)(best_base4[1] | (best_base4[0] << 4)); + + const uint32_t diff = false; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (best_mod4[0] << 5) | (best_mod4[1] << 2)); + + pBest_sels = best_sel4; + } + else + { + const int delta_y = (best_base5[1] - best_base5[0]) & 7; + + pBlock[0] = pBlock[1] = pBlock[2] = (uint8_t)(delta_y | (best_base5[0] << 3)); + + const uint32_t diff = 1; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (best_mod5[0] << 5) | (best_mod5[1] << 2)); + + pBest_sels = best_sel5; + } + + uint16_t l_bitmask = 0, h_bitmask = 0; + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + uint32_t best_etc1_sel = pBest_sels[subblock]; + + l_bitmask |= s_sel_bitmasks[flip * 8 + subblock * 4 + best_etc1_sel][0]; + h_bitmask |= s_sel_bitmasks[flip * 8 + subblock * 4 + best_etc1_sel][1]; + } + + pBlock[7] = (uint8_t)(l_bitmask); + pBlock[6] = (uint8_t)(l_bitmask >> 8); + pBlock[5] = (uint8_t)(h_bitmask); + pBlock[4] = (uint8_t)(h_bitmask >> 8); + } + + void pack_etc1_grayscale(uint8_t* pBlock, const uint8_t* pPixels, pack_etc1_state& state) + { + (void)state; + + const uint8_t fc = pPixels[0]; + + if (fc == pPixels[15]) + { + int k; + for (k = 1; k < 15; k++) + if (pPixels[k] != fc) + break; + + if (k == 15) + { + memcpy(pBlock, &g_solid_grayscale_etc1_blocks[fc][0], sizeof(decoder_etc_block)); + return; + } + } + + int accum_y[4] = { 0 }, accum_y2[4] = { 0 }; + + for (uint32_t i = 0; i < 16; i++) + { + int y = pPixels[i]; + int y2 = y * y; + + const int vi = s_vi[i], hi = s_hi[i]; + + accum_y[vi] += y; + accum_y2[vi] += y2; + + accum_y[hi] += y; + accum_y2[hi] += y2; + + } // i + + int var_y_scaled[4]; // scaled by x64 (8*8) + for (uint32_t i = 0; i < 4; i++) + var_y_scaled[i] = basisu::maximum(0, (accum_y2[i] << 3) - (accum_y[i] * accum_y[i])); // max not needed + + float std_luma[4]; + for (uint32_t i = 0; i < 4; i++) + std_luma[i] = sqrtf((float)var_y_scaled[i] * (1.0f / 64.0f)); + + float flip0_score = std_luma[0] + std_luma[1]; + float flip1_score = std_luma[2] + std_luma[3]; + + const uint32_t flip = flip1_score < flip0_score; + + int var8_y[2] = {}, mean8_y[2] = {}; + int min_y[2] = { INT_MAX, INT_MAX }, max_y[2] = { INT_MIN, INT_MIN }; + + for (uint32_t i = 0; i < 16; i++) + { + const int y = pPixels[i]; + + const uint32_t s = s_subsets[flip][i]; + + var8_y[s] += y * y; + mean8_y[s] += y; + + min_y[s] = basisu::minimum(min_y[s], y); + max_y[s] = basisu::maximum(max_y[s], y); + } + + if (((max_y[0] - min_y[0]) < 8) && ((max_y[1] - min_y[1]) < 8)) + { + uint8_t subblock_means[2] = { + (uint8_t)((mean8_y[0] + 4) / 8), + (uint8_t)((mean8_y[1] + 4) / 8), + }; + + if (subblock_means[0] == subblock_means[1]) + memcpy(pBlock, &g_solid_grayscale_etc1_blocks[subblock_means[0]][0], sizeof(decoder_etc_block)); + else + pack_etc1_grayscale_solid_subblocks(pBlock, pPixels, subblock_means, flip); + + return; + } + + int half_span8_y[2]; + float stddev_y[2]; + + for (uint32_t i = 0; i < 2; i++) + { + var8_y[i] = basisu::maximum(0, (var8_y[i] << 3) - mean8_y[i] * mean8_y[i]); + stddev_y[i] = std::sqrt(static_cast(var8_y[i])) * (1.0f / 8.0f); + + mean8_y[i] = (mean8_y[i] + 4) >> 3; + + half_span8_y[i] = basisu::maximum(max_y[i] - mean8_y[i], mean8_y[i] - min_y[i]); + } + + int stddev[2] = + { + basisu::clamp((int)ceilf(9.0f * (stddev_y[0] / (float)basisu::maximum(1, half_span8_y[0]))) - 1, 0, 7), + basisu::clamp((int)ceilf(9.0f * (stddev_y[1] / (float)basisu::maximum(1, half_span8_y[1]))) - 1, 0, 7) + }; + + uint32_t mod_tab[2]; + for (uint32_t i = 0; i < 2; i++) + mod_tab[i] = etc1f::g_etc1_mod_tabs[basisu::clamp(half_span8_y[i], 1, 255)][stddev[i]]; + + int mean5_y[2]; + + for (uint32_t i = 0; i < 2; i++) + mean5_y[i] = g_nearest5[mean8_y[i]]; + + int delta5_y = mean5_y[1] - mean5_y[0]; + + const uint32_t z = delta5_y + 4; + bool use_abs_colors4 = z > 7; + + if (!use_abs_colors4) + { + assert((delta5_y >= -4) && (delta5_y <= 3)); + } + + if (use_abs_colors4) + { + int mean4_y[2]; + + for (uint32_t i = 0; i < 2; i++) + { + mean4_y[i] = g_nearest4[mean8_y[i]]; + } // i + + pBlock[0] = pBlock[1] = pBlock[2] = (uint8_t)(mean4_y[1] | (mean4_y[0] << 4)); + + const uint32_t diff = 0; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (mod_tab[0] << 5) | (mod_tab[1] << 2)); + } + else + { + pBlock[0] = pBlock[1] = pBlock[2] = (uint8_t)((delta5_y & 7) | (mean5_y[0] << 3)); + + const uint32_t diff = 1; + pBlock[3] = (uint8_t)(flip | (diff << 1) | (mod_tab[0] << 5) | (mod_tab[1] << 2)); + } + + //decoder_etc_block& blk = *(decoder_etc_block*)pBlock; + + uint16_t l_bitmask = 0; + uint16_t h_bitmask = 0; + + static const uint8_t s_tran[4] = { 1, 0, 2, 3 }; + + uint8_t subblock_colors[2][4]; + get_block_colors_y(pBlock, &subblock_colors[0][0], &subblock_colors[1][0]); + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + const uint8_t* block_y = &subblock_colors[subblock][0]; + + //color_rgba block_colors[4]; + //blk.get_block_colors(block_colors, subblock); + + //uint32_t block_y[4]; + //for (uint32_t i = 0; i < 4; i++) + //block_y[i] = block_colors[i]; + + const uint32_t block_y01 = block_y[0] + block_y[1]; + const uint32_t block_y12 = block_y[1] + block_y[2]; + const uint32_t block_y23 = block_y[2] + block_y[3]; + + if (flip) + { + uint32_t ofs = subblock * 2; + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint8_t c = pPixels[x + (subblock * 2 + y) * 4]; + const uint32_t l = c * 2; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint8_t c = pPixels[subblock * 2 + x + y * 4]; + const uint32_t l = c * 2; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + pBlock[7] = (uint8_t)(l_bitmask); + pBlock[6] = (uint8_t)(l_bitmask >> 8); + pBlock[5] = (uint8_t)(h_bitmask); + pBlock[4] = (uint8_t)(h_bitmask >> 8); + + } // subblock + } + +} // namespace etc1f + +#endif // BASISD_SUPPORT_XUASTC + +//------------------------------------------------------------------------------------------------ +// XUASTC LDR transcoding +//------------------------------------------------------------------------------------------------ +// XUASTC adaptive deblocking threshold +const int XUASTC_LDR_DEBLOCK_SKIP_THRESH = 24; + +block_format xuastc_get_block_format(transcoder_texture_format tex_fmt) +{ + switch (tex_fmt) + { + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: return block_format::cASTC_LDR_4x4; + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: return block_format::cASTC_LDR_5x4; + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: return block_format::cASTC_LDR_5x5; + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: return block_format::cASTC_LDR_6x5; + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: return block_format::cASTC_LDR_6x6; + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: return block_format::cASTC_LDR_8x5; + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: return block_format::cASTC_LDR_8x6; + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: return block_format::cASTC_LDR_10x5; + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: return block_format::cASTC_LDR_10x6; + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: return block_format::cASTC_LDR_8x8; + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: return block_format::cASTC_LDR_10x8; + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: return block_format::cASTC_LDR_10x10; + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: return block_format::cASTC_LDR_12x10; + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: return block_format::cASTC_LDR_12x12; + default: + break; + } + + assert(0); + return block_format::cASTC_LDR_4x4; +} + +basisu_lowlevel_xuastc_ldr_transcoder::basisu_lowlevel_xuastc_ldr_transcoder() +{ +} + +#if BASISD_SUPPORT_XUASTC +void transcode_4x4_block( + block_format fmt, + uint32_t block_x, uint32_t block_y, + void *pDst_blocks, uint8_t* pDst_block_u8, + const color32* block_pixels, + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, + int channel0, int channel1, + bool high_quality, bool from_alpha, + uint32_t bc7f_flags, + etc1f::pack_etc1_state& etc1_pack_state, + int has_alpha) // has_alpha = -1 unknown, 0=definitely no (a all 255's), 1=potentially yes +{ + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + + switch (fmt) + { + case block_format::cETC1: + { + assert(output_block_or_pixel_stride_in_bytes == 8); + if (from_alpha) + { + // Annoying overhead + uint8_t alpha_pixels[16]; + for (uint32_t i = 0; i < 16; i++) + alpha_pixels[i] = block_pixels[i].a; + + etc1f::pack_etc1_grayscale(pDst_block_u8, alpha_pixels, etc1_pack_state); + } + else + { + etc1f::pack_etc1(pDst_block_u8, (color_rgba *)block_pixels, etc1_pack_state); + } + break; + } + case block_format::cETC2_RGBA: + { + assert(output_block_or_pixel_stride_in_bytes == 16); + + (high_quality ? pack_eac_high_quality : pack_eac)(reinterpret_cast(pDst_block_u8)[0], &block_pixels[0].c[3], sizeof(color32)); + etc1f::pack_etc1(pDst_block_u8 + 8, (color_rgba*)block_pixels, etc1_pack_state); + + break; + } + case block_format::cETC2_EAC_R11: + { + assert(output_block_or_pixel_stride_in_bytes == 8); + + // Pack R by default + if (channel0 < 0) + channel0 = 0; + + (high_quality ? pack_eac_high_quality : pack_eac)(reinterpret_cast(pDst_block_u8)[0], &block_pixels[0].c[channel0], sizeof(color32)); + + break; + } + case block_format::cETC2_EAC_RG11: + { + assert(output_block_or_pixel_stride_in_bytes == 16); + + // Pack RA by default + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + + (high_quality ? pack_eac_high_quality : pack_eac)(reinterpret_cast(pDst_block_u8)[0], &block_pixels[0].c[channel0], sizeof(color32)); + (high_quality ? pack_eac_high_quality : pack_eac)(reinterpret_cast(pDst_block_u8)[1], &block_pixels[0].c[channel1], sizeof(color32)); + + break; + } + case block_format::cBC1: + { + assert(output_block_or_pixel_stride_in_bytes == 8); + + encode_bc1(pDst_block_u8, (const uint8_t *)block_pixels, high_quality ? cEncodeBC1HighQuality : 0); + break; + } + case block_format::cBC3: + { + assert(output_block_or_pixel_stride_in_bytes == 16); + + encode_bc4(pDst_block_u8, &block_pixels[0].c[3], sizeof(color32)); + encode_bc1(pDst_block_u8 + 8, (const uint8_t *)block_pixels, high_quality ? cEncodeBC1HighQuality : 0); + break; + } + case block_format::cBC4: + { + assert(output_block_or_pixel_stride_in_bytes == 8); + + // Pack R by default + if (channel0 < 0) + channel0 = 0; + + encode_bc4(pDst_block_u8, &block_pixels[0].c[channel0], sizeof(color32)); + break; + } + case block_format::cBC5: + { + assert(output_block_or_pixel_stride_in_bytes == 16); + + // Pack RA by default + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + + encode_bc4(pDst_block_u8, &block_pixels[0].c[channel0], sizeof(color32)); + encode_bc4(pDst_block_u8 + 8, &block_pixels[0].c[channel1], sizeof(color32)); + + break; + } + case block_format::cBC7: + { + assert(output_block_or_pixel_stride_in_bytes == 16); + + // 0=definitely no alpha, so skip alpha checks + if (has_alpha == 0) + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, (const basist::color_rgba*)block_pixels, bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, (const basist::color_rgba*)block_pixels, bc7f_flags); + + break; + } + case block_format::cRGBA32: + { + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + if ((max_x == 4) && (max_y == 4)) + { + memcpy(pDst_pixels, block_pixels, 4 * sizeof(color32)); + memcpy(pDst_pixels + output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t) * 1, &block_pixels[1 * 4], 4 * sizeof(color32)); + memcpy(pDst_pixels + output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t) * 2, &block_pixels[2 * 4], 4 * sizeof(color32)); + memcpy(pDst_pixels + output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t) * 3, &block_pixels[3 * 4], 4 * sizeof(color32)); + } + else + { + for (uint32_t y = 0; y < max_y; y++) + { + memcpy(pDst_pixels, &block_pixels[y * 4], max_x * sizeof(color32)); + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + } + + break; + } + case block_format::cRGB565: + case block_format::cBGR565: + { + // This writes little endian data always. + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y * 4 + x]; + + const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) : + static_cast((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444: + { + // This writes little endian data always. + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y * 4 + x]; + + const uint16_t packed = static_cast((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + break; + } + default: + // Unsupported or invalid format + assert(0); + break; + } +} + +static bool xuastc_deblock_filter( + uint32_t filter_block_width, uint32_t filter_block_height, + const basisu::vector2D &src_img, + basisu::vector2D &dst_img, + bool stronger_filtering, int skip_thresh) +{ + basisu::vector2D temp_img; + if (!temp_img.try_resize(src_img.get_width(), src_img.get_height())) + return false; + + if (stronger_filtering) + skip_thresh *= 2; + + //basisu::fmt_printf("stronger filtering: {}, skip_thread: {}\n", stronger_filtering, skip_thresh); + + temp_img = src_img; + + for (int y = 0; y < (int)src_img.get_height(); y++) + { + for (int x = filter_block_width; x < (int)src_img.get_width(); x += filter_block_width) + { + const color32 &ll = src_img.at_clamped(x - 2, y); + const color32 &l = src_img.at_clamped(x - 1, y); + const color32 &r =src_img.at_clamped(x, y); + const color32 &rr = src_img.at_clamped(x + 1, y); + + if (skip_thresh < 256) + { + bool skip_flag = false; + for (uint32_t c = 0; c < 4; c++) + { + int delta = basisu::iabs((int)l[c] - (int)r[c]); + if (delta > skip_thresh) + { + skip_flag = true; + break; + } + } + + if (skip_flag) + continue; + } + + color32 ml, mr; + for (uint32_t c = 0; c < 4; c++) + { + if (stronger_filtering) + { + ml[c] = (3 * l[c] + 2 * r[c] + ll[c] + 3) / 6; + mr[c] = (3 * r[c] + 2 * l[c] + rr[c] + 3) / 6; + } + else + { + ml[c] = (5 * l[c] + 2 * r[c] + ll[c] + 4) / 8; + mr[c] = (5 * r[c] + 2 * l[c] + rr[c] + 4) / 8; + } + } + + temp_img.set_clipped(x - 1, y, ml); + temp_img.set_clipped(x, y, mr); + + } // x + + } // y + + dst_img = temp_img; + + for (int x = 0; x < (int)temp_img.get_width(); x++) + { + for (int y = filter_block_height; y < (int)temp_img.get_height(); y += filter_block_height) + { + const color32 &uu = temp_img.at_clamped(x, y - 2); + const color32 &u = temp_img.at_clamped(x, y - 1); + const color32 &d = temp_img.at_clamped(x, y); + const color32 &dd = temp_img.at_clamped(x, y + 1); + + if (skip_thresh < 256) + { + bool skip_flag = false; + for (uint32_t c = 0; c < 4; c++) + { + int delta = basisu::iabs((int)u[c] - (int)d[c]); + if (delta > skip_thresh) + { + skip_flag = true; + break; + } + } + + if (skip_flag) + continue; + } + + color32 mu, md; + for (uint32_t c = 0; c < 4; c++) + { + if (stronger_filtering) + { + mu[c] = (3 * u[c] + 2 * d[c] + uu[c] + 3) / 6; + md[c] = (3 * d[c] + 2 * u[c] + dd[c] + 3) / 6; + } + else + { + mu[c] = (5 * u[c] + 2 * d[c] + uu[c] + 4) / 8; + md[c] = (5 * d[c] + 2 * u[c] + dd[c] + 4) / 8; + } + } + + dst_img.set_clipped(x, y - 1, mu); + dst_img.set_clipped(x, y, md); + + } // x + + } // y + + return true; +} + +static void xuastc_fixup_pvrtc1_4_modulation_rgb( + const basisu::vector2D& temp_image, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, + uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha) +{ + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + color32 block_pixels[4][4]; + temp_image.extract_block_clamped(&block_pixels[0][0], x * 4, y * 4, 4, 4); + + if (from_alpha) + { + // Just set RGB to alpha to avoid adding complexity below. + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t a = ((color32*)block_pixels)[i].a; + ((color32*)block_pixels)[i].set(a, a, a, 255); + } + } + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define BUT_DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \ + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \ + } + + BUT_DO_ROW(0); + BUT_DO_ROW(1); + BUT_DO_ROW(2); +#undef BUT_DO_ROW + } + + uint32_t mod = 0; + +#define BUT_DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + BUT_DO_PIX(0, 0, 4, 4, 4, 4); + BUT_DO_PIX(1, 0, 2, 6, 2, 6); + BUT_DO_PIX(0, 1, 2, 2, 6, 6); + BUT_DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + BUT_DO_PIX(2, 0, 8, 0, 8, 0); + BUT_DO_PIX(3, 0, 6, 2, 6, 2); + BUT_DO_PIX(2, 1, 4, 0, 12, 0); + BUT_DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + BUT_DO_PIX(0, 2, 8, 8, 0, 0); + BUT_DO_PIX(1, 2, 4, 12, 0, 0); + BUT_DO_PIX(0, 3, 6, 6, 2, 2); + BUT_DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + BUT_DO_PIX(2, 2, 16, 0, 0, 0); + BUT_DO_PIX(3, 2, 12, 4, 0, 0); + BUT_DO_PIX(2, 3, 12, 0, 4, 0); + BUT_DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef BUT_DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y +} + +static void xuastc_fixup_pvrtc1_4_modulation_rgba( + const basisu::vector2D& temp_image, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) +{ + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = get_endpoint_l8(e, 0); + e1[ex][ey] = get_endpoint_l8(e, 1); + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) + { + color32 block_pixels[4][4]; + temp_image.extract_block_clamped(&block_pixels[0][0], x * 4, y * 4, 4, 4); + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = get_endpoint_l8(e, 0); \ + e1[ex][ey] = get_endpoint_l8(e, 1); \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y +} + +void encode_pvrtc1( + block_format fmt, void* pDst_blocks, + const basisu::vector2D &temp_image, + uint32_t dst_num_blocks_x, uint32_t dst_num_blocks_y, bool from_alpha) +{ + assert((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)); + + basisu::vector2D pvrtc1_endpoints(dst_num_blocks_x, dst_num_blocks_y); + + // Determine block endpoints + for (uint32_t dst_by = 0; dst_by < dst_num_blocks_y; dst_by++) + { + for (uint32_t dst_bx = 0; dst_bx < dst_num_blocks_x; dst_bx++) + { + color32 block_pixels[4 * 4]; + + temp_image.extract_block_clamped(block_pixels, dst_bx * 4, dst_by * 4, 4, 4); + + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + + if ((fmt == block_format::cPVRTC1_4_RGB) && (from_alpha)) + { + low_color.set(low_color.a, low_color.a, low_color.a, 255); + high_color.set(high_color.a, high_color.a, high_color.a, 255); + } + + pvrtc4_block temp; + if (fmt == block_format::cPVRTC1_4_RGBA) + { + temp.set_endpoint_floor(0, low_color); + temp.set_endpoint_ceil(1, high_color); + } + else + { + temp.set_opaque_endpoint_floor(0, low_color); + temp.set_opaque_endpoint_ceil(1, high_color); + } + + pvrtc1_endpoints(dst_bx, dst_by) = temp.m_endpoints; + } // dst_bx + + } // dst_by + + // Create PVRTC1 texture data. + if (fmt == block_format::cPVRTC1_4_RGBA) + xuastc_fixup_pvrtc1_4_modulation_rgba(temp_image, pvrtc1_endpoints.get_ptr(), pDst_blocks, dst_num_blocks_x, dst_num_blocks_y); + else + xuastc_fixup_pvrtc1_4_modulation_rgb(temp_image, pvrtc1_endpoints.get_ptr(), pDst_blocks, dst_num_blocks_x, dst_num_blocks_y, from_alpha); +} + +#endif // BASISD_SUPPORT_XUASTC + +static inline bool blocks_same_solid_colors(const astc_helpers::log_astc_block& a, const astc_helpers::log_astc_block& b, uint32_t tol) +{ + if ((!a.m_solid_color_flag_ldr) || (!b.m_solid_color_flag_ldr)) + return false; + + if (tol == 0) + { + return (a.m_solid_color[0] == b.m_solid_color[0]) && (a.m_solid_color[1] == b.m_solid_color[1]) && + (a.m_solid_color[2] == b.m_solid_color[2]) && (a.m_solid_color[3] == b.m_solid_color[3]); + } + + for (uint32_t i = 0; i < 4; i++) + { + int ac = a.m_solid_color[i] >> 8; + int bc = b.m_solid_color[i] >> 8; + + const int dl = basisu::iabs((int)ac - (int)bc); + if (dl > (int)tol) + return false; + } + + return true; +} + +static inline bool blocks_same_single_subset_endpoints(const astc_helpers::log_astc_block& a, const astc_helpers::log_astc_block& b, uint32_t tol) +{ + if (a.m_solid_color_flag_ldr || b.m_solid_color_flag_ldr) + return false; + + if (a.m_dual_plane || b.m_dual_plane) + return false; + + if ((a.m_num_partitions > 1) || (b.m_num_partitions > 1)) + return false; + + if (a.m_color_endpoint_modes[0] != b.m_color_endpoint_modes[0]) + return false; + + if (a.m_endpoint_ise_range != b.m_endpoint_ise_range) + return false; + + if (tol > 0) + { + // Compare endpoints with tolerance + color_rgba al, ah; + astc_ldr_t::decode_endpoints(a.m_color_endpoint_modes[0], a.m_endpoints, a.m_endpoint_ise_range, al, ah); + + color_rgba bl, bh; + astc_ldr_t::decode_endpoints(b.m_color_endpoint_modes[0], b.m_endpoints, b.m_endpoint_ise_range, bl, bh); + + for (uint32_t i = 0; i < 4; i++) + { + const int dl = basisu::iabs((int)al[i] - (int)bl[i]); + if (dl > (int)tol) + return false; + + const int dh = basisu::iabs((int)ah[i] - (int)bh[i]); + if (dh > (int)tol) + return false; + } + } + else + { + uint32_t total_endpoint_vals = astc_helpers::get_num_cem_values(a.m_color_endpoint_modes[0]); + if (memcmp(a.m_endpoints, b.m_endpoints, total_endpoint_vals) != 0) + return false; + } + + return true; +} + +static inline bool block_has_alpha(const astc_helpers::log_astc_block& a) +{ + if (a.m_solid_color_flag_ldr) + { + return (a.m_solid_color[3] >> 8) != 255; + } + + assert(a.m_num_partitions == 1); + + return astc_helpers::does_cem_have_alpha(a.m_color_endpoint_modes[0]); +} + +static void astc_upsample_grid_weights(const astc_helpers::log_astc_block& log_blk, uint8_t* pDst_weights, uint32_t block_width, uint32_t block_height) +{ + // Skip if solid (which is fine) + if (log_blk.m_solid_color_flag_ldr) + { +#if defined(DEBUG) || defined(_DEBUG) + memset(pDst_weights, 0xFF, block_width * block_height); +#endif + return; + } + + assert((log_blk.m_grid_width <= block_width) && (log_blk.m_grid_height <= block_height)); + + uint8_t dequantized_weights[astc_helpers::MAX_BLOCK_PIXELS]; + + const uint32_t total_weight_vals = log_blk.m_grid_width * log_blk.m_grid_height; + + const astc_helpers::dequant_table& weight_dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range); + const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data(); + + for (uint32_t i = 0; i < total_weight_vals; i++) + { + assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32()); + + dequantized_weights[i] = pWeight_dequant[log_blk.m_weights[i]]; + } + + if ((log_blk.m_grid_width < block_width) || (log_blk.m_grid_height < block_height)) + { + astc_helpers::upsample_weight_grid_xuastc_ldr(block_width, block_height, log_blk.m_grid_width, log_blk.m_grid_height, dequantized_weights, pDst_weights, nullptr, nullptr); + } + else + { + memcpy(pDst_weights, dequantized_weights, block_width * block_height); + } +} + +bool basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice( + basis_tex_format src_format, bool use_astc_srgb_decode_profile, + void* pDst_blocks, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, + const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, + const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) +{ + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + +#if BASISD_SUPPORT_XUASTC + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + + if (block_format_is_hdr(fmt)) + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: Invalid fmt argument\n"); + return false; + } + + //const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + const uint32_t src_block_width = basis_tex_format_get_block_width(src_format), src_block_height = basis_tex_format_get_block_height(src_format); + + const uint32_t dst_fmt_block_width = get_block_width(fmt), dst_fmt_block_height = get_block_height(fmt); + const bool dst_fmt_is_astc = block_format_is_astc(fmt); + const bool dst_fmt_is_pvrtc1 = (fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA); + + if (dst_fmt_is_pvrtc1) + { + if (!basisu::is_pow2(orig_width) || !basisu::is_pow2(orig_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: PVRTC1 requires power of 2 texture dimensions\n"); + return false; + } + } + + const bool is_uncompressed_fmt = basis_block_format_is_uncompressed(fmt); + if (!output_row_pitch_in_blocks_or_pixels) + { + if (is_uncompressed_fmt) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + output_row_pitch_in_blocks_or_pixels = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + } + + if (is_uncompressed_fmt) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0; + const bool enable_fast_bc7_transcoding = (decode_flags & cDecodeFlagXUASTCLDRDisableFastBC7Transcoding) == 0; + const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const bool disable_deblocking = (decode_flags & cDecodeFlagsNoDeblockFiltering) != 0; + const bool stronger_deblocking = ((decode_flags & cDecodeFlagsStrongerDeblockFiltering) != 0) || ((src_block_width > 8) || (src_block_height > 8)); + const bool force_deblocking = (decode_flags & cDecodeFlagsForceDeblockFiltering) != 0; + const bool deblock_filtering = !disable_deblocking && (force_deblocking || ((src_block_width > 8) || (src_block_height > 6))); + + const uint32_t bc7f_flags = high_quality ? bc7f::cPackBC7FlagDefaultPartiallyAnalytical : bc7f::cPackBC7FlagDefault; + etc1f::pack_etc1_state etc1_pack_state; + + if (basis_tex_format_is_astc_ldr(src_format)) + { + // Plain ASTC LDR 4x4-12x12 - note it could be ANY valid/standard ASTC written by any ASTC encoder, so we cannot trust this ASTC data. + // It must be fully validated. + if (dst_fmt_is_astc) + { + assert(output_block_or_pixel_stride_in_bytes == sizeof(astc_helpers::astc_block)); + + if ((dst_fmt_block_width != src_block_width) || (dst_fmt_block_height != src_block_height)) + { + // ASTC block dimensions must match, i.e. we can't change the ASTC block size during transcoding. + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: fmt's ASTC block dimensions don't match the content's block dimensions\n"); + return false; + } + + // No transcoding needed, it's ASTC in->ASTC out. + memcpy(pDst_blocks, pImage_data, src_num_blocks_x * src_num_blocks_y * sizeof(astc_helpers::astc_block)); + } + else if (((src_block_width == 4) && (src_block_height == 4)) && (!dst_fmt_is_pvrtc1) && (!deblock_filtering)) + { + // Block dimensions aren't changing, no pvrtc1, no deblock filtering + if ((dst_fmt_block_width != 4) || (dst_fmt_block_height != 4)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: fmt's ASTC block dimensions don't match the content's block dimensions\n"); + return false; + } + + const astc_helpers::astc_block* pSrc_phys_blk = (const astc_helpers::astc_block*)pImage_data; + + astc_helpers::log_astc_block log_blk; + + for (uint32_t block_y = 0; block_y < src_num_blocks_y; block_y++) + { + uint8_t* pDst_block_u8 = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < src_num_blocks_x; block_x++) + { + color32 block_pixels[4 * 4]; + + bool unpack_status = astc_helpers::unpack_block(pSrc_phys_blk, log_blk, 4, 4); + if (!unpack_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::unpack_block() failed\n"); + return false; + } + + // TODO: Specially handle solid block case + bool decode_status = astc_helpers::decode_block(log_blk, block_pixels, 4, 4, use_astc_srgb_decode_profile ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + transcode_4x4_block( + fmt, + block_x, block_y, + pDst_blocks, pDst_block_u8, + block_pixels, + output_block_or_pixel_stride_in_bytes, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, + channel0, channel1, + high_quality, from_alpha, + bc7f_flags, + etc1_pack_state); + + pDst_block_u8 += output_block_or_pixel_stride_in_bytes; + ++pSrc_phys_blk; + + } // block_x + + } // block_y + } + else if ((!deblock_filtering) && (!dst_fmt_is_pvrtc1)) + { + assert((dst_fmt_block_width == 4) && (dst_fmt_block_height == 4)); + + // Compute how many source block rows we need to buffer so we have a multiple of 4 scanlines. The max # of scanlines is 20. + uint32_t num_src_block_rows_to_buffer = 1; + while ((num_src_block_rows_to_buffer * src_block_height) & 3) + num_src_block_rows_to_buffer++; + assert((num_src_block_rows_to_buffer >= 1) && (num_src_block_rows_to_buffer <= 4)); + + // Compute how many 4x4 dest blocks fit into these many source rows. + assert(((num_src_block_rows_to_buffer * src_block_height) & 3) == 0); + //const uint32_t num_dst_block_rows_to_buffer = (num_src_block_rows_to_buffer * src_block_height) >> 2; + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + basisu::vector2D buffered_rows(src_num_blocks_x * src_block_width, num_src_block_rows_to_buffer * src_block_height); + + const astc_helpers::astc_block* pSrc_phys_blk = (const astc_helpers::astc_block*)pImage_data; + + astc_helpers::log_astc_block log_blk; + + for (uint32_t by = 0; by < src_num_blocks_y; by++) + { + const uint32_t buffered_src_block_row_y = (by % num_src_block_rows_to_buffer); + + for (uint32_t bx = 0; bx < src_num_blocks_x; bx++) + { + color32 block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + + bool unpack_status = astc_helpers::unpack_block(pSrc_phys_blk, log_blk, src_block_width, src_block_height); + if (!unpack_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::unpack_block() failed\n"); + return false; + } + + bool decode_status = astc_helpers::decode_block(log_blk, block_pixels, src_block_width, src_block_height, use_astc_srgb_decode_profile ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + color32* pSrc_pixels = block_pixels; + color32* pDst_pixels = &buffered_rows(bx * src_block_width, buffered_src_block_row_y * src_block_height); + + for (uint32_t y = 0; y < src_block_height; y++) + { + memcpy(pDst_pixels, pSrc_pixels, src_block_width * sizeof(color32)); + + pSrc_pixels += src_block_width; + pDst_pixels += buffered_rows.get_width(); + } // y + + ++pSrc_phys_blk; + + } // block_x + + const bool final_src_block_row = (by == (src_num_blocks_y - 1)); + + if ((buffered_src_block_row_y != (num_src_block_rows_to_buffer - 1)) && (!final_src_block_row)) + continue; + + // src/destination image Y coordinate of the top of the buffered rows + const uint32_t buffered_src_pixel_y = ((by / num_src_block_rows_to_buffer) * num_src_block_rows_to_buffer) * src_block_height; + assert((buffered_src_pixel_y & 3) == 0); + + // The total # of valid src block rows we can read. + const uint32_t num_buffered_src_block_rows = buffered_src_block_row_y + 1; + + assert((num_buffered_src_block_rows == num_src_block_rows_to_buffer) || (final_src_block_row)); + + // The maximum number of valid buffer scanlines we can fetch from, taking into account the original texture's actual (unpadded) height. + const uint32_t override_buffer_height = basisu::minimum(orig_height - buffered_src_pixel_y, num_buffered_src_block_rows * src_block_height); + assert(override_buffer_height); + + // total_dst_block_rows_to_emit=really an upper bound for the final row of src ASTC blocks + const uint32_t total_dst_block_rows_to_emit = (num_buffered_src_block_rows * src_block_height + 3) >> 2; + + for (uint32_t dst_ofs_by = 0; dst_ofs_by < total_dst_block_rows_to_emit; dst_ofs_by++) + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dst_ofs_by; + if (dst_by >= dst_num_blocks_y) + break; + + for (uint32_t dst_bx = 0; dst_bx < dst_num_blocks_x; dst_bx++) + { + color32 block_pixels[4 * 4]; + + // Extract the 4x4 block pixels from our buffered rows, taking into account the actual # of valid scanlines inside the buffer. + buffered_rows.extract_block_clamped(block_pixels, dst_bx * 4, dst_ofs_by * 4, 4, 4, override_buffer_height); + + uint8_t* pDst_block_u8 = (uint8_t*)pDst_blocks + (dst_by * output_row_pitch_in_blocks_or_pixels + dst_bx) * output_block_or_pixel_stride_in_bytes; + + transcode_4x4_block( + fmt, + dst_bx, dst_by, + pDst_blocks, pDst_block_u8, + block_pixels, + output_block_or_pixel_stride_in_bytes, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, + channel0, channel1, + high_quality, from_alpha, + bc7f_flags, + etc1_pack_state); + + } // dst_bx + + } // dst_ofs_by + + } // block_y + + } + else + { + // unpack entire 32bpp image into memory (needed for deblocking and PVRTC1) + // TODO: Add more memory efficient non-deblocking code path + basisu::vector2D temp_image; + + const uint32_t actual_width = src_block_width * src_num_blocks_x, actual_height = src_block_height * src_num_blocks_y; + + if (!temp_image.try_resize(actual_width, actual_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: out of memory\n"); + return false; + } + + const astc_helpers::astc_block* pSrc_phys_blk = (const astc_helpers::astc_block*)pImage_data; + + astc_helpers::log_astc_block log_blk; + color32 block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + + for (uint32_t src_by = 0; src_by < src_num_blocks_y; src_by++) + { + const uint32_t img_y = src_by * src_block_height; + + for (uint32_t src_bx = 0; src_bx < src_num_blocks_x; src_bx++) + { + bool unpack_status = astc_helpers::unpack_block(pSrc_phys_blk, log_blk, src_block_width, src_block_height); + if (!unpack_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::unpack_block() failed\n"); + return false; + } + + bool decode_status = astc_helpers::decode_block(log_blk, block_pixels, src_block_width, src_block_height, use_astc_srgb_decode_profile ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + color32* pSrc_pixels = (color32*)block_pixels; + color32* pDst_pixels = &temp_image(src_bx * src_block_width, img_y); + + for (uint32_t y = 0; y < src_block_height; y++) + { + memcpy(pDst_pixels, pSrc_pixels, src_block_width * sizeof(color32)); + + pSrc_pixels += src_block_width; + pDst_pixels += temp_image.get_width(); + } // y + + ++pSrc_phys_blk; + + } // src_bx + } // src_by + + if (deblock_filtering) + { + if (!xuastc_deblock_filter( + src_block_width, src_block_height, + temp_image, temp_image, + stronger_deblocking, XUASTC_LDR_DEBLOCK_SKIP_THRESH)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: out of memory\n"); + return false; + } + } + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + if (dst_fmt_is_pvrtc1) + { + assert((dst_fmt_block_width == 4) && (dst_fmt_block_height == 4)); + + encode_pvrtc1(fmt, pDst_blocks, temp_image, dst_num_blocks_x, dst_num_blocks_y, from_alpha); + } + else + { + for (uint32_t dst_by = 0; dst_by < dst_num_blocks_y; dst_by++) + { + uint8_t* pDst_block_u8 = (uint8_t*)pDst_blocks + dst_by * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t dst_bx = 0; dst_bx < dst_num_blocks_x; dst_bx++) + { + temp_image.extract_block_clamped(block_pixels, dst_bx * 4, dst_by * 4, 4, 4); + + transcode_4x4_block( + fmt, + dst_bx, dst_by, + pDst_blocks, pDst_block_u8, + block_pixels, + output_block_or_pixel_stride_in_bytes, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, + channel0, channel1, + high_quality, from_alpha, + bc7f_flags, + etc1_pack_state); + + pDst_block_u8 += output_block_or_pixel_stride_in_bytes; + + } // dst_bx + + } // dst_by + + } // if (dst_fmt_is_pvrtc1) + } + } + else + { + // Supercompressed XUASTC LDR 4x4-12x12 + // note use_astc_srgb_decode_profile can be ignored here, we'll use the decoded sRGB profile bit from the compressed stream. + + if (dst_fmt_is_astc) + { + // src and dst are ASTC - ideal case, just pack physical ASTC blocks to output buffer during transcoding. + struct decode_state + { + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + uint32_t m_dst_format_block_width; + uint32_t m_dst_format_block_height; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + }; + + decode_state dec_state; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_dst_format_block_width = dst_fmt_block_width; + dec_state.m_dst_format_block_height = dst_fmt_block_height; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + BASISU_NOTE_UNUSED(has_alpha); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != state.m_dst_format_block_width) || (block_height != state.m_dst_format_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (1)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (2)\n"); + return false; + } + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + + astc_helpers::astc_block* pDst_astc_block = (astc_helpers::astc_block*)((uint8_t*)state.m_pDst_blocks + (by * state.m_output_row_pitch_in_blocks_or_pixels + bx) * state.m_output_block_or_pixel_stride_in_bytes); + + bool pack_status = astc_helpers::pack_astc_block(*pDst_astc_block, log_blk); + if (!pack_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::pack_astc_block() failed\n"); + return false; + } + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + } + else if ((fmt == block_format::cBC7) && (src_block_width == 8) && (src_block_height == 6) && + (enable_fast_bc7_transcoding) && (!high_quality) && (!deblock_filtering)) + { + // src is ASTC LDR 8x6, destination is BC7, no deblocking: buffer 2 rows of ASTC logical blocks, favor fast pure transcode to BC7 whenever possible. + // transcodes 2 ASTC 8x6 blocks (a tile of 1x2 or 2*48=96 pixels) to 6 BC7 blocks (a tile of 2x3 of 6*16=96 pixels) + // no BC7 block crosses more than 2 ASTC blocks making this easy if the source blocks are only solid or 1 subset + + const uint32_t num_src_block_rows_to_buffer = 2; + + assert(((num_src_block_rows_to_buffer* src_block_height) & 3) == 0); + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + basisu::vector2D buffered_rows(src_num_blocks_x, num_src_block_rows_to_buffer); + + struct decode_state + { + uint32_t m_orig_height; + + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + uint32_t m_src_block_width; + uint32_t m_src_block_height; + + uint32_t m_dst_num_blocks_x; + uint32_t m_dst_num_blocks_y; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + uint32_t m_output_rows_in_pixels; + + uint32_t m_num_src_block_rows_to_buffer; + //uint32_t m_num_dst_block_rows_to_buffer; + + basisu::vector2D* m_pBuffered_rows; + + bool m_used_srgb_astc_decode_mode; + bool m_has_alpha; + + uint32_t m_total_src_blocks_unpacked; + uint32_t m_total_src_blocks_partial_unpacked; + uint32_t m_total_blocks_transcoded; + uint32_t m_total_blocks_encoded; + + block_format m_fmt; + int m_channel0, m_channel1; + bool m_high_quality; + bool m_from_alpha; + uint32_t m_bc7f_flags; + etc1f::pack_etc1_state* m_pEtc1_pack_state; + }; + + decode_state dec_state; + dec_state.m_orig_height = orig_height; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_src_block_width = src_block_width; + dec_state.m_src_block_height = src_block_height; + dec_state.m_dst_num_blocks_x = dst_num_blocks_x; + dec_state.m_dst_num_blocks_y = dst_num_blocks_y; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + dec_state.m_output_rows_in_pixels = output_rows_in_pixels; + + dec_state.m_num_src_block_rows_to_buffer = num_src_block_rows_to_buffer; + //dec_state.m_num_dst_block_rows_to_buffer = num_dst_block_rows_to_buffer; + + dec_state.m_pBuffered_rows = &buffered_rows; + dec_state.m_used_srgb_astc_decode_mode = false; // will be set by init from the compressed stream's header + dec_state.m_has_alpha = true; // will be set by init from the compressed stream's header + + dec_state.m_total_src_blocks_unpacked = 0; + dec_state.m_total_src_blocks_partial_unpacked = 0; + dec_state.m_total_blocks_transcoded = 0; + dec_state.m_total_blocks_encoded = 0; + + dec_state.m_fmt = fmt; + dec_state.m_channel0 = channel0; + dec_state.m_channel1 = channel1; + dec_state.m_high_quality = high_quality; + dec_state.m_from_alpha = from_alpha; + dec_state.m_bc7f_flags = bc7f_flags; + dec_state.m_pEtc1_pack_state = &etc1_pack_state; + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != state.m_src_block_width) || (block_height != state.m_src_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (3)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (4)\n"); + return false; + } + + state.m_used_srgb_astc_decode_mode = srgb_decode_profile; + state.m_has_alpha = has_alpha; + + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + assert(state.m_num_src_block_rows_to_buffer == 2); // hardcoded for 6x6 + const astc_helpers::decode_mode astc_dec_mode = state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8; + + const uint32_t buffered_src_block_row_y = (by & 1); + + memcpy(&(*state.m_pBuffered_rows)(bx, buffered_src_block_row_y), &log_blk, sizeof(log_blk)); + + // Last block on this src row? If not, exit. + if (bx != (state.m_src_num_blocks_x - 1)) + return true; + + // We've written the final src block for this ASTC src row. + // See if we have enough source rows to create full 4x4 destination blocks. + const bool final_src_block_row = (by == (state.m_src_num_blocks_y - 1)); + + if ((buffered_src_block_row_y != (state.m_num_src_block_rows_to_buffer - 1)) && (!final_src_block_row)) + return true; + + // We have a full 1-2 rows of ASTC 6x6 blocks to process to BC7. + + // src/destination image Y coordinate of the top of the buffered rows + const uint32_t buffered_src_pixel_y = ((by / state.m_num_src_block_rows_to_buffer) * state.m_num_src_block_rows_to_buffer) * state.m_src_block_height; + assert((buffered_src_pixel_y & 3) == 0); + + // The total # of valid src block rows we can read. + const uint32_t num_buffered_src_block_rows = buffered_src_block_row_y + 1; + + assert((num_buffered_src_block_rows == state.m_num_src_block_rows_to_buffer) || (final_src_block_row)); + +#if defined(DEBUG) || defined(_DEBUG) + // The maximum number of valid buffer pixel scanlines we can fetch from, taking into account the original texture's actual (unpadded) height. + const uint32_t override_buffer_height = basisu::minimum(state.m_orig_height - buffered_src_pixel_y, num_buffered_src_block_rows * state.m_src_block_height); + assert(override_buffer_height); +#endif + + // total_dst_block_rows_to_emit=really an upper bound for the final row of src ASTC blocks + const uint32_t total_dst_block_rows_to_emit = (num_buffered_src_block_rows * state.m_src_block_height + 3) >> 2; + + color_rgba unpacked_src_blocks[2][8 * 6]; // [astc_by][pixel] + uint8_t upsampled_src_weights[2][8 * 6]; // [astc_by][pixel] + color_rgba temp_pixels_16[16]; + + // Process each source ASTC 8x6 block group, 1x2 ASTC blocks at a time + for (uint32_t src_bx = 0; src_bx < state.m_src_num_blocks_x; src_bx++) + { + bool has_unpacked_src_blocks[2] = { }; // [astc_by] + + // Grab pointers to the 1x2 src ASTC blocks we'll be transcoding + const astc_helpers::log_astc_block* pSrc_log_blocks[2]; // [astc_by] + + pSrc_log_blocks[0] = &state.m_pBuffered_rows->at(src_bx, 0); + pSrc_log_blocks[1] = &state.m_pBuffered_rows->at(src_bx, basisu::minimum(1, num_buffered_src_block_rows - 1)); + + // From here we can always assume 2x2 src ASTC 6x6 blocks, with ASTC block pointers duplicated at the borders if needed. + + const astc_helpers::log_astc_block* pU = pSrc_log_blocks[0]; + const astc_helpers::log_astc_block* pL = pSrc_log_blocks[1]; + + if (blocks_same_solid_colors(*pU, *pL, 0)) + { + // Easy and fast case: All 2 ASTC blocks are solid and the same color, so all 6 BC7 blocks are solid too and the same color. + color_rgba sc; + sc.r = (uint8_t)(pU->m_solid_color[0] >> 8); + sc.g = (uint8_t)(pU->m_solid_color[1] >> 8); + sc.b = (uint8_t)(pU->m_solid_color[2] >> 8); + sc.a = (uint8_t)(pU->m_solid_color[3] >> 8); + + bc7_block temp_blk; + bc7f::pack_mode5_solid((uint8_t*)&temp_blk, sc); + + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 2; dx++) + { + const uint32_t dst_bx = (src_bx << 1) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + memcpy(pDst_block_u8, &temp_blk, sizeof(bc7_block)); + } // dx + } // dy + + continue; + } + + uint32_t num_hard_src_blocks = 0; + + for (uint32_t y = 0; y < 2; y++) + num_hard_src_blocks += (pSrc_log_blocks[y]->m_dual_plane || (pSrc_log_blocks[y]->m_num_partitions > 1)); + + if (num_hard_src_blocks == 2) + { + // All src blocks hard, no easy optimizations, so unpack and encode pixels analytically + for (uint32_t y = 0; y < 2; y++) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pSrc_log_blocks[y], &unpacked_src_blocks[y][0], 8, 6, astc_dec_mode); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + } + + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 2; dx++) + { + const uint32_t dst_bx = (src_bx << 1) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t sy = dy * 4 + y; + + const uint32_t sy_div6 = sy / 6; + const uint32_t src_row_ofs = (sy % 6) * 8; + + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t sx = dx * 4 + x; + temp_pixels_16[x + y * 4] = unpacked_src_blocks[sy_div6][sx + src_row_ofs]; + } // x + } // y + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } // dx + } // dy + + continue; + } + + // One or both of the 2 source blocks is solid or 1 subset. + + // For simplicity: First unpack the first plane weight grids (unless solid) + for (uint32_t y = 0; y < 2; y++) + astc_upsample_grid_weights(*pSrc_log_blocks[y], &upsampled_src_weights[y][0], 8, 6); + + const uint32_t ENDPOINT_TOL = 0; + + // Process each of the up to 6 destination BC7 blocks. + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 2; dx++) + { + const uint32_t dst_bx = (src_bx << 1) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + // BC7 block only overlaps 1 or 2 ASTC blocks. + const int top_dy = dy * 4; + const int bot_dy = top_dy + 3; + + const int top_by = top_dy / 6; + const int bot_by = bot_dy / 6; + + const astc_helpers::log_astc_block* pB0 = pSrc_log_blocks[top_by]; + const astc_helpers::log_astc_block* pB1 = pSrc_log_blocks[bot_by]; + + const bool single_src_block = (top_by == bot_by); + + bool full_encode_flag = false; + + if (pB0->m_dual_plane || pB1->m_dual_plane || + (pB0->m_num_partitions > 1) || (pB1->m_num_partitions > 1)) + { + // Either block is complex, fall back to reencoding + full_encode_flag = true; + } + else if (single_src_block) + { + assert(pB0 == pB1); + + // BC7 block is at a corner, and only overlaps a single ASTC block - output solid or single subset BC7 + if (pB0->m_solid_color_flag_ldr) + { + color_rgba sc; + sc.r = (uint8_t)(pB0->m_solid_color[0] >> 8); + sc.g = (uint8_t)(pB0->m_solid_color[1] >> 8); + sc.b = (uint8_t)(pB0->m_solid_color[2] >> 8); + sc.a = (uint8_t)(pB0->m_solid_color[3] >> 8); + + bc7f::pack_mode5_solid(pDst_block_u8, sc); + } + else + { + // Output mode 6 BC7 + bc7f::pack_from_astc_single_subset(pDst_block_u8, *pB0, &upsampled_src_weights[top_by][0], dx * 4, (dy * 4) % 6, 8, 6); + } + + state.m_total_blocks_transcoded++; + } + // must be overlapping 2 ASTC blocks, can't be both solid as we've already checked + else if (blocks_same_single_subset_endpoints(*pB0, *pB1, ENDPOINT_TOL)) + { + // BC7 blocks overlaps 2 single subset ASTC blocks, both have the same or very similar endpoints, output mode 6 BC7 + bc7f::pack_from_astc_to_single_subset_same_endpoints( + pDst_block_u8, + *pB0, &upsampled_src_weights[top_by][0], + *pB1, &upsampled_src_weights[bot_by][0], + dx, dy, + 8, 6); + + state.m_total_blocks_transcoded++; + } + else if (!block_has_alpha(*pB0) && !block_has_alpha(*pB1)) + { + bool fallback_encode_flag = false; + + // BC7 block overlaps 2 ASTC blocks with different endpoints (or solid colors) - output 2 subset mode 1 BC7 + if (!bc7f::pack_from_astc_8x6_to_two_subsets_different_endpoints_hq( + pDst_block_u8, + *pB0, &upsampled_src_weights[top_by][0], + *pB1, &upsampled_src_weights[bot_by][0], + dx, dy, state.m_used_srgb_astc_decode_mode, fallback_encode_flag)) + { + full_encode_flag = true; + } + else + { + if (fallback_encode_flag) + state.m_total_src_blocks_partial_unpacked++; + else + state.m_total_blocks_transcoded++; + } + } + else + { + full_encode_flag = true; + } + + if (full_encode_flag) + { + // one or both ASTC blocks are just too complex, unpack and reencode + if (!has_unpacked_src_blocks[top_by]) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pB0, &unpacked_src_blocks[top_by][0], 8, 6, astc_dec_mode, &upsampled_src_weights[top_by][0]); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + has_unpacked_src_blocks[top_by] = true; + } + + if (!has_unpacked_src_blocks[bot_by]) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pB1, &unpacked_src_blocks[bot_by][0], 8, 6, astc_dec_mode, &upsampled_src_weights[bot_by][0]); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + has_unpacked_src_blocks[bot_by] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t sy = dy * 4 + y; + + assert(has_unpacked_src_blocks[sy / 6]); + + const uint32_t sy_div6 = sy / 6; + const uint32_t src_row_ofs = (sy % 6) * 8; + + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t sx = dx * 4 + x; + + temp_pixels_16[x + y * 4] = unpacked_src_blocks[sy_div6][sx + src_row_ofs]; + } // x + } // y + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } + + } // dx + } // dy + + } // src_bx + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + + if (basisu::g_debug_printf) + { + basisu::fmt_debug_printf("Total src blocks: {}, Total src blocks fully unpacked to pixels: {}\n", + dec_state.m_src_num_blocks_x * dec_state.m_src_num_blocks_y, + dec_state.m_total_src_blocks_unpacked); + + basisu::fmt_debug_printf("Total dst blocks: {}, Total blocks transcoded: {}, fully encoded: {}, total partially unpacked/fast mode 6 encoded: {}\n", + dec_state.m_dst_num_blocks_x * dec_state.m_dst_num_blocks_y, + dec_state.m_total_blocks_transcoded, dec_state.m_total_blocks_encoded, + dec_state.m_total_src_blocks_partial_unpacked); + } + + // end of 8x6->4x4 transcoder + } + else if ((fmt == block_format::cBC7) && (src_block_width == 6) && (src_block_height == 6) && + (enable_fast_bc7_transcoding) && (!high_quality) && (!deblock_filtering)) + { + // src is ASTC LDR 6x6, destination is BC7, no deblocking: buffer 2 rows of ASTC logical blocks, favor fast pure transcode to BC7 whenever possible. + // This path is maddenningly tricky, but the speed gains in certain use cases are worth it. + + const uint32_t num_src_block_rows_to_buffer = 2; + + assert(((num_src_block_rows_to_buffer * src_block_height) & 3) == 0); + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + basisu::vector2D buffered_rows(src_num_blocks_x, num_src_block_rows_to_buffer); + + struct decode_state + { + uint32_t m_orig_height; + + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + uint32_t m_src_block_width; + uint32_t m_src_block_height; + + uint32_t m_dst_num_blocks_x; + uint32_t m_dst_num_blocks_y; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + uint32_t m_output_rows_in_pixels; + + uint32_t m_num_src_block_rows_to_buffer; + //uint32_t m_num_dst_block_rows_to_buffer; + + basisu::vector2D *m_pBuffered_rows; + + bool m_used_srgb_astc_decode_mode; + bool m_has_alpha; + + uint32_t m_total_src_blocks_unpacked; + uint32_t m_total_src_blocks_partial_unpacked; + uint32_t m_total_blocks_transcoded; + uint32_t m_total_blocks_encoded; + + block_format m_fmt; + int m_channel0, m_channel1; + bool m_high_quality; + bool m_from_alpha; + uint32_t m_bc7f_flags; + etc1f::pack_etc1_state* m_pEtc1_pack_state; + }; + + decode_state dec_state; + dec_state.m_orig_height = orig_height; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_src_block_width = src_block_width; + dec_state.m_src_block_height = src_block_height; + dec_state.m_dst_num_blocks_x = dst_num_blocks_x; + dec_state.m_dst_num_blocks_y = dst_num_blocks_y; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + dec_state.m_output_rows_in_pixels = output_rows_in_pixels; + + dec_state.m_num_src_block_rows_to_buffer = num_src_block_rows_to_buffer; + //dec_state.m_num_dst_block_rows_to_buffer = num_dst_block_rows_to_buffer; + + dec_state.m_pBuffered_rows = &buffered_rows; + dec_state.m_used_srgb_astc_decode_mode = false; // will be set by init from the compressed stream's header + dec_state.m_has_alpha = true; // will be set by init from the compressed stream's header + + dec_state.m_total_src_blocks_unpacked = 0; + dec_state.m_total_src_blocks_partial_unpacked = 0; + dec_state.m_total_blocks_transcoded = 0; + dec_state.m_total_blocks_encoded = 0; + + dec_state.m_fmt = fmt; + dec_state.m_channel0 = channel0; + dec_state.m_channel1 = channel1; + dec_state.m_high_quality = high_quality; + dec_state.m_from_alpha = from_alpha; + dec_state.m_bc7f_flags = bc7f_flags; + dec_state.m_pEtc1_pack_state = &etc1_pack_state; + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != state.m_src_block_width) || (block_height != state.m_src_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (3)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (4)\n"); + return false; + } + + state.m_used_srgb_astc_decode_mode = srgb_decode_profile; + state.m_has_alpha = has_alpha; + + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + assert(state.m_num_src_block_rows_to_buffer == 2); // hardcoded for 6x6 + const astc_helpers::decode_mode astc_dec_mode = state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8; + + const uint32_t buffered_src_block_row_y = (by & 1); + + memcpy(&(*state.m_pBuffered_rows)(bx, buffered_src_block_row_y), &log_blk, sizeof(log_blk)); + + // Last block on this src row? If not, exit. + if (bx != (state.m_src_num_blocks_x - 1)) + return true; + + // We've written the final src block for this ASTC src row. + // See if we have enough source rows to create full 4x4 destination blocks. + const bool final_src_block_row = (by == (state.m_src_num_blocks_y - 1)); + + if ((buffered_src_block_row_y != (state.m_num_src_block_rows_to_buffer - 1)) && (!final_src_block_row)) + return true; + + // We have a full 1-2 rows of ASTC 6x6 blocks to process to BC7. + + // src/destination image Y coordinate of the top of the buffered rows + const uint32_t buffered_src_pixel_y = ((by / state.m_num_src_block_rows_to_buffer) * state.m_num_src_block_rows_to_buffer) * state.m_src_block_height; + assert((buffered_src_pixel_y & 3) == 0); + + // The total # of valid src block rows we can read. + const uint32_t num_buffered_src_block_rows = buffered_src_block_row_y + 1; + + assert((num_buffered_src_block_rows == state.m_num_src_block_rows_to_buffer) || (final_src_block_row)); + +#if defined(DEBUG) || defined(_DEBUG) + // The maximum number of valid buffer pixel scanlines we can fetch from, taking into account the original texture's actual (unpadded) height. + const uint32_t override_buffer_height = basisu::minimum(state.m_orig_height - buffered_src_pixel_y, num_buffered_src_block_rows * state.m_src_block_height); + assert(override_buffer_height); +#endif + + // total_dst_block_rows_to_emit=really an upper bound for the final row of src ASTC blocks + const uint32_t total_dst_block_rows_to_emit = (num_buffered_src_block_rows * state.m_src_block_height + 3) >> 2; + + color_rgba unpacked_src_blocks[2][2][6 * 6]; // [x][y][pixel] + uint8_t upsampled_src_weights[2][2][6 * 6]; // [x][y][pixel] + color_rgba temp_pixels_16[16]; + + // Process each source ASTC 6x6 block group, 2x2 ASTC blocks at a time (12x12 pixels, or 3x3 BC7 blocks) + for (uint32_t src_bx = 0; src_bx < state.m_src_num_blocks_x; src_bx += 2) + { + bool has_unpacked_src_blocks[2][2] = { }; // [x][y] + + // Grab pointers to the 2x2 src ASTC blocks we'll be transcoding + const astc_helpers::log_astc_block *pSrc_log_blocks[2][2]; // [x][y] + + pSrc_log_blocks[0][0] = &state.m_pBuffered_rows->at(src_bx, 0); + pSrc_log_blocks[1][0] = &state.m_pBuffered_rows->at(basisu::minimum(src_bx + 1, state.m_src_num_blocks_x - 1), 0); + + pSrc_log_blocks[0][1] = &state.m_pBuffered_rows->at(src_bx, basisu::minimum(1, num_buffered_src_block_rows - 1)); + + pSrc_log_blocks[1][1] = &state.m_pBuffered_rows->at( + basisu::minimum(src_bx + 1, state.m_src_num_blocks_x - 1), + basisu::minimum(1, num_buffered_src_block_rows - 1)); + + // From here we can always assume 2x2 src ASTC 6x6 blocks, with ASTC block pointers duplicated at the borders if needed. + + const astc_helpers::log_astc_block* pUL = pSrc_log_blocks[0][0]; + + // First see if all the astc blocks are the same solid color. Likely a common case on some images. + bool all_solid = true; + for (uint32_t y = 0; (y < 2) && all_solid; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + if (!pSrc_log_blocks[x][y]->m_solid_color_flag_ldr) + { + all_solid = false; + break; + } + + if ((pSrc_log_blocks[x][y]->m_solid_color[0] != pUL->m_solid_color[0]) || (pSrc_log_blocks[x][y]->m_solid_color[1] != pUL->m_solid_color[1]) || + (pSrc_log_blocks[x][y]->m_solid_color[2] != pUL->m_solid_color[2]) || (pSrc_log_blocks[x][y]->m_solid_color[3] != pUL->m_solid_color[3])) + { + all_solid = false; + break; + } + } + } + + if (all_solid) + { + // Easy and fast case: All 4 ASTC blocks are solid and the same color, so all 9 BC7 blocks are solid too and the same color. + color_rgba sc; + sc.r = (uint8_t)(pUL->m_solid_color[0] >> 8); + sc.g = (uint8_t)(pUL->m_solid_color[1] >> 8); + sc.b = (uint8_t)(pUL->m_solid_color[2] >> 8); + sc.a = (uint8_t)(pUL->m_solid_color[3] >> 8); + + bc7_block temp_blk; + bc7f::pack_mode5_solid((uint8_t *)&temp_blk, sc); + + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 3; dx++) + { + const uint32_t dst_bx = ((src_bx * 6) >> 2) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + memcpy(pDst_block_u8, &temp_blk, sizeof(bc7_block)); + } + } + + continue; + } + + uint32_t num_hard_src_blocks = 0; + + for (uint32_t y = 0; y < 2; y++) + for (uint32_t x = 0; x < 2; x++) + num_hard_src_blocks += (pSrc_log_blocks[x][y]->m_dual_plane || (pSrc_log_blocks[x][y]->m_num_partitions > 1)); + + if (num_hard_src_blocks == 4) + { + // All src blocks hard, no easy optimizations, so unpack and encode pixels analytically + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pSrc_log_blocks[x][y], &unpacked_src_blocks[x][y][0], 6, 6, astc_dec_mode); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + } + } + + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 3; dx++) + { + const uint32_t dst_bx = ((src_bx * 6) >> 2) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + // TODO: Optimize + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t sy = dy * 4 + y; + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t sx = dx * 4 + x; + temp_pixels_16[x + y * 4] = unpacked_src_blocks[sx / 6][sy / 6][(sx % 6) + (sy % 6) * 6]; + } // x + } // y + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } // dx + } // dy + + continue; + } + + // One or more of the 4 source blocks is solid or 1 subset. + + // For simplicity: First unpack the first plane weight grids (unless solid) + for (uint32_t y = 0; y < 2; y++) + for (uint32_t x = 0; x < 2; x++) + astc_upsample_grid_weights(*pSrc_log_blocks[x][y], &upsampled_src_weights[x][y][0], 6, 6); + + //const uint32_t ENDPOINT_TOL = 3; + const uint32_t ENDPOINT_TOL = 1; + + // Process each of the 9 destination BC7 blocks. + for (uint32_t dy = 0; dy < total_dst_block_rows_to_emit; dy++) // up to 3 dst block rows + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dx = 0; dx < 3; dx++) + { + // skip the central block, which we'll processed last + if ((dx == 1) && (dy == 1)) + continue; + + const uint32_t dst_bx = ((src_bx * 6) >> 2) + dx; + if (dst_bx >= state.m_dst_num_blocks_x) + break; + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + // We're NOT at the center BC7 dst block. + // BC7 block only overlaps 1 or 2 ASTC blocks. + const int top_dx = dx * 4, top_dy = dy * 4; + const int bot_dx = top_dx + 3, bot_dy = top_dy + 3; + + const int top_bx = top_dx / 6, top_by = top_dy / 6; + const int bot_bx = bot_dx / 6, bot_by = bot_dy / 6; + + const astc_helpers::log_astc_block* pB0 = pSrc_log_blocks[top_bx][top_by]; + const astc_helpers::log_astc_block* pB1 = pSrc_log_blocks[bot_bx][bot_by]; + + // Note: because of row/col duplication at the edges of images, pB0 could equal pB1 even though we're not at a BC7 corner block. + + const bool single_src_block = (top_bx == bot_bx) && (top_by == bot_by); + + bool full_encode_flag = false; + + if (pB0->m_dual_plane || pB1->m_dual_plane || + (pB0->m_num_partitions > 1) || (pB1->m_num_partitions > 1)) + { + // Either block is complex, fall back to reencoding + full_encode_flag = true; + } + else if (single_src_block) + { + assert(pB0 == pB1); + + // BC7 block is at a corner, and only overlaps a single ASTC block - output solid or single subset BC7 + if (pB0->m_solid_color_flag_ldr) + { + color_rgba sc; + sc.r = (uint8_t)(pB0->m_solid_color[0] >> 8); + sc.g = (uint8_t)(pB0->m_solid_color[1] >> 8); + sc.b = (uint8_t)(pB0->m_solid_color[2] >> 8); + sc.a = (uint8_t)(pB0->m_solid_color[3] >> 8); + + bc7f::pack_mode5_solid(pDst_block_u8, sc); + } + else + { + // Output mode 6 BC7 + bc7f::pack_from_astc_single_subset(pDst_block_u8, *pB0, &upsampled_src_weights[top_bx][top_by][0], (dx * 4) % 6, (dy * 4) % 6, 6, 6); + } + + state.m_total_blocks_transcoded++; + } + // below here BC7 block always overlaps 2 ASTC 6x6 blocks (1 block case just ruled out) + else if (blocks_same_solid_colors(*pB0, *pB1, 0)) + { + // BC7 block overlaps 2 ASTC blocks, both solid colors, both same colors + + color_rgba sc; + sc.r = (uint8_t)(pB0->m_solid_color[0] >> 8); + sc.g = (uint8_t)(pB0->m_solid_color[1] >> 8); + sc.b = (uint8_t)(pB0->m_solid_color[2] >> 8); + sc.a = (uint8_t)(pB0->m_solid_color[3] >> 8); + bc7f::pack_mode5_solid(pDst_block_u8, sc); + + state.m_total_blocks_transcoded++; + } + else if (blocks_same_single_subset_endpoints(*pB0, *pB1, ENDPOINT_TOL)) + { + // BC7 blocks overlaps 2 single subset ASTC blocks, both have the same or very similar endpoints, output mode 6 BC7 + bc7f::pack_from_astc_to_single_subset_same_endpoints( + pDst_block_u8, + *pB0, &upsampled_src_weights[top_bx][top_by][0], + *pB1, &upsampled_src_weights[bot_bx][bot_by][0], + dx, dy, + 6, 6); + + state.m_total_blocks_transcoded++; + } + else if (!block_has_alpha(*pB0) && !block_has_alpha(*pB1)) + { + // BC7 block overlaps 2 ASTC blocks with different endpoints (or solid colors) - output 2 subset mode 1 BC7 + if (!bc7f::pack_from_astc_6x6_to_two_subsets_different_endpoints( + pDst_block_u8, + *pB0, &upsampled_src_weights[top_bx][top_by][0], + *pB1, &upsampled_src_weights[bot_bx][bot_by][0], + dx, dy)) + { + full_encode_flag = true; + } + else + { + state.m_total_blocks_transcoded++; + } + } + else + { + full_encode_flag = true; + } + + if (full_encode_flag) + { + // one or both ASTC blocks are just too complex, unpack and reencode + if (!has_unpacked_src_blocks[top_bx][top_by]) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pB0, &unpacked_src_blocks[top_bx][top_by][0], 6, 6, astc_dec_mode, &upsampled_src_weights[top_bx][top_by][0]); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + has_unpacked_src_blocks[top_bx][top_by] = true; + } + + if (!has_unpacked_src_blocks[bot_bx][bot_by]) + { + bool status = astc_helpers::decode_block_xuastc_ldr(*pB1, &unpacked_src_blocks[bot_bx][bot_by][0], 6, 6, astc_dec_mode, &upsampled_src_weights[bot_bx][bot_by][0]); + if (!status) + { + return false; + } + state.m_total_src_blocks_unpacked++; + has_unpacked_src_blocks[bot_bx][bot_by] = true; + } + + // TODO: Optimize + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t sy = dy * 4 + y; + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t sx = dx * 4 + x; + + assert(has_unpacked_src_blocks[sx / 6][sy / 6]); + + temp_pixels_16[x + y * 4] = unpacked_src_blocks[sx / 6][sy / 6][(sx % 6) + (sy % 6) * 6]; + } // x + } // y + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } + + } // dx + } // dy + + // Now handle the center BC7 block - by this point we may have already decoded 1 or more ASTC 6x6 blocks. + const uint32_t dx = 1, dy = 1; + const uint32_t dst_bx = ((src_bx * 6) >> 2) + dx; + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dy; + + if ((dst_bx < state.m_dst_num_blocks_x) && (dst_by < state.m_dst_num_blocks_y)) + { + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + bool skip_full_encode = false; + + // the unfortunate middle BC7 block, overlaps all 4 ASTC blocks + // The 4 ASTC blocks cannot be all solid, and cannot be all hard. + if (num_hard_src_blocks == 0) + { + // all blocks are simple, see if we can find a simple case to handle quickly + bool top_same_solid_color = blocks_same_solid_colors(*pSrc_log_blocks[0][0], *pSrc_log_blocks[1][0], 1); + bool bot_same_solid_color = blocks_same_solid_colors(*pSrc_log_blocks[0][1], *pSrc_log_blocks[1][1], 1); + + bool left_same_solid_color = blocks_same_solid_colors(*pSrc_log_blocks[0][0], *pSrc_log_blocks[0][1], 1); + bool right_same_solid_color = blocks_same_solid_colors(*pSrc_log_blocks[1][0], *pSrc_log_blocks[1][1], 1); + + bool top_same_endpoints = blocks_same_single_subset_endpoints(*pSrc_log_blocks[0][0], *pSrc_log_blocks[1][0], ENDPOINT_TOL); + bool bot_same_endpoints = blocks_same_single_subset_endpoints(*pSrc_log_blocks[0][1], *pSrc_log_blocks[1][1], ENDPOINT_TOL); + + bool left_same_endpoints = blocks_same_single_subset_endpoints(*pSrc_log_blocks[0][0], *pSrc_log_blocks[0][1], ENDPOINT_TOL); + bool right_same_endpoints = blocks_same_single_subset_endpoints(*pSrc_log_blocks[1][0], *pSrc_log_blocks[1][1], ENDPOINT_TOL); + + bool top_no_alpha = !block_has_alpha(*pSrc_log_blocks[0][0]) && !block_has_alpha(*pSrc_log_blocks[1][0]); + bool bot_no_alpha = !block_has_alpha(*pSrc_log_blocks[0][1]) && !block_has_alpha(*pSrc_log_blocks[1][1]); + + bool left_no_alpha = !block_has_alpha(*pSrc_log_blocks[0][0]) && !block_has_alpha(*pSrc_log_blocks[0][1]); + bool right_no_alpha = !block_has_alpha(*pSrc_log_blocks[1][0]) && !block_has_alpha(*pSrc_log_blocks[1][1]); + + bool top_transcodable = (top_same_solid_color || top_same_endpoints) && top_no_alpha; + bool bot_transcodable = (bot_same_solid_color || bot_same_endpoints) && bot_no_alpha; + + bool left_transcodable = (left_same_solid_color || left_same_endpoints) && left_no_alpha; + bool right_transcodable = (right_same_solid_color || right_same_endpoints) && right_no_alpha; + + if (top_transcodable && bot_transcodable) + { + // BC7 mode 1 + bc7f::pack_astc_6x6_to_two_subsets_middle_block( + pDst_block_u8, + pSrc_log_blocks, upsampled_src_weights, + false); + state.m_total_blocks_transcoded++; + skip_full_encode = true; + } + else if (left_transcodable && right_transcodable) + { + // BC7 mode 1 + bc7f::pack_astc_6x6_to_two_subsets_middle_block( + pDst_block_u8, + pSrc_log_blocks, upsampled_src_weights, + true); + state.m_total_blocks_transcoded++; + skip_full_encode = true; + } + + // TODO: Handle non-exact cases + } + + if (!skip_full_encode) + { + // Center BC7 block scenario is complex, requires full decode+analytical BC7 encode. + if (has_unpacked_src_blocks[0][0] && has_unpacked_src_blocks[1][0] && has_unpacked_src_blocks[0][1] && has_unpacked_src_blocks[1][1]) + { + // We've already decoded all the ASTC 6x6 blocks fully, so grab central pixels + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t sy = dy * 4 + y; + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t sx = dx * 4 + x; + + temp_pixels_16[x + y * 4] = unpacked_src_blocks[sx / 6][sy / 6][(sx % 6) + (sy % 6) * 6]; + } // x + } // y + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } + else + { + for (uint32_t iby = 0; iby < 2; iby++) + { + for (uint32_t ibx = 0; ibx < 2; ibx++) + { + // Do a partial decode of the ASTC block, just to get those central pixels (big savings) + uint32_t start_x, start_y, end_x, end_y; + + switch (ibx + iby * 2) + { + case 0: start_x = 4; end_x = 6; start_y = 4; end_y = 6; break; + case 1: start_x = 0; end_x = 2; start_y = 4; end_y = 6; break; + case 2: start_x = 4; end_x = 6; start_y = 0; end_y = 2; break; + default: + case 3: start_x = 0; end_x = 2; start_y = 0; end_y = 2; break; + } + + // See if we've already decoded the block + if (has_unpacked_src_blocks[ibx][iby]) + { + for (uint32_t py = 0; py < 2; py++) + { + const uint32_t sy = start_y + py; + + for (uint32_t px = 0; px < 2; px++) + { + const uint32_t sx = start_x + px; + + temp_pixels_16[(px + ibx * 2) + (py + iby * 2) * 4] = unpacked_src_blocks[ibx][iby][(sx % 6) + (sy % 6) * 6]; + } // x + } // y + } + else + { + // Partial decode + color_rgba temp_pixels[6 * 6]; + + bool status = astc_helpers::decode_block_xuastc_ldr(*pSrc_log_blocks[ibx][iby], temp_pixels, 6, 6, astc_dec_mode, + &upsampled_src_weights[ibx][iby][0], start_x, start_y, end_x, end_y); + if (!status) + { + return false; + } + state.m_total_src_blocks_partial_unpacked++; + + for (uint32_t py = 0; py < 2; py++) + { + const uint32_t ey = py + iby * 2; + + for (uint32_t px = 0; px < 2; px++) + { + const uint32_t ex = px + ibx * 2; + + temp_pixels_16[ex + ey * 4] = temp_pixels[(start_x + px) + (start_y + py) * 6]; + } // x + } // y + } + } // x + } // y + + if (state.m_has_alpha) + bc7f::fast_pack_bc7_auto_rgba(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + else + bc7f::fast_pack_bc7_auto_rgb(pDst_block_u8, temp_pixels_16, state.m_bc7f_flags); + + state.m_total_blocks_encoded++; + } + + } // skip_full_encode + + } // if ((dst_bx < state.m_dst_num_blocks_x) && (dst_by < state.m_dst_num_blocks_y)) + + } // src_bx + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + + if (basisu::g_debug_printf) + { + basisu::fmt_debug_printf("Total src blocks: {}, Total src blocks unpacked to pixels: {}, total partial unpacks: {}\n", + dec_state.m_src_num_blocks_x * dec_state.m_src_num_blocks_y, + dec_state.m_total_src_blocks_unpacked, dec_state.m_total_src_blocks_partial_unpacked); + + basisu::fmt_debug_printf("Total dst blocks: {}, Total blocks transcoded: {}, encoded: {}\n", + dec_state.m_dst_num_blocks_x * dec_state.m_dst_num_blocks_y, + dec_state.m_total_blocks_transcoded, dec_state.m_total_blocks_encoded); + } + } + else if (((src_block_width == 4) && (src_block_height == 4)) && (!dst_fmt_is_pvrtc1) && (!deblock_filtering)) + { + // src is ASTC LDR 4x4, destination block size must be 4x4, no PVRTC1, no deblocking. Directly pack to target format during transcoding. + struct decode_state + { + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + uint32_t m_output_rows_in_pixels; + + block_format m_fmt; + bool m_used_srgb_astc_decode_mode; + bool m_has_alpha; + + int m_channel0, m_channel1; + bool m_high_quality; + bool m_enable_fast_bc7_transcoding; + bool m_from_alpha; + uint32_t m_bc7f_flags; + etc1f::pack_etc1_state* m_pEtc1_pack_state; + }; + + decode_state dec_state; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + dec_state.m_output_rows_in_pixels = output_rows_in_pixels; + dec_state.m_fmt = fmt; + dec_state.m_used_srgb_astc_decode_mode = false; // will be set by init from the compressed stream's header + dec_state.m_has_alpha = true; // will be set by init from the compressed stream's header + + dec_state.m_channel0 = channel0; + dec_state.m_channel1 = channel1; + dec_state.m_high_quality = high_quality; + dec_state.m_enable_fast_bc7_transcoding = enable_fast_bc7_transcoding; + dec_state.m_from_alpha = from_alpha; + dec_state.m_bc7f_flags = bc7f_flags; + dec_state.m_pEtc1_pack_state = &etc1_pack_state; + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != 4) || (block_height != 4)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (3)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (4)\n"); + return false; + } + + state.m_used_srgb_astc_decode_mode = srgb_decode_profile; + state.m_has_alpha = has_alpha; + + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (by * state.m_output_row_pitch_in_blocks_or_pixels + bx) * state.m_output_block_or_pixel_stride_in_bytes; + + // Special fast cases for BC7 transcode target given common ASTC configs + if (state.m_fmt == block_format::cBC7) + { + if (log_blk.m_solid_color_flag_ldr) + { + color_rgba sc; + sc.r = (uint8_t)(log_blk.m_solid_color[0] >> 8); + sc.g = (uint8_t)(log_blk.m_solid_color[1] >> 8); + sc.b = (uint8_t)(log_blk.m_solid_color[2] >> 8); + sc.a = (uint8_t)(log_blk.m_solid_color[3] >> 8); + + bc7f::pack_mode5_solid(pDst_block_u8, sc); + return true; + } + else if (!log_blk.m_dual_plane && (log_blk.m_num_partitions == 1) && !state.m_high_quality && state.m_enable_fast_bc7_transcoding) + { + // TODO: This does cost a tiny amount of PSNR (.1-25 dB or so), but is way faster. + bc7f::pack_from_astc_4x4_single_subset(pDst_block_u8, log_blk); + return true; + } + } + + // Fall back to block pixel unpack then analytical encode. + color32 block_pixels[4 * 4]; + bool decode_status = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels, 4, 4, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block_xuastc_ldr() failed\n"); + return false; + } + +#if defined(_DEBUG) || defined(DEBUG) + color32 alt_block_pixels[4 * 4]; + if (!astc_helpers::decode_block(log_blk, alt_block_pixels, 4, 4, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + for (uint32_t i = 0; i < 16; i++) + { + assert(block_pixels[i][0] == alt_block_pixels[i][0]); + assert(block_pixels[i][1] == alt_block_pixels[i][1]); + assert(block_pixels[i][2] == alt_block_pixels[i][2]); + assert(block_pixels[i][3] == alt_block_pixels[i][3]); + } +#endif + + transcode_4x4_block( + state.m_fmt, + bx, by, + state.m_pDst_blocks, pDst_block_u8, + block_pixels, + state.m_output_block_or_pixel_stride_in_bytes, state.m_output_row_pitch_in_blocks_or_pixels, state.m_output_rows_in_pixels, + state.m_channel0, state.m_channel1, + state.m_high_quality, state.m_from_alpha, + state.m_bc7f_flags, + *state.m_pEtc1_pack_state, + state.m_has_alpha ? 1 : 0); + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + } + else if ((deblock_filtering) || (dst_fmt_is_pvrtc1)) + { + // Completely general case. Unpack entire 32bpp image into memory (needed for deblocking and PVRTC1). + assert((dst_fmt_block_width == 4) && (dst_fmt_block_height == 4)); + basisu::vector2D temp_image; + + if (!temp_image.try_resize(src_num_blocks_x * src_block_width, src_num_blocks_y * src_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: out of memory\n"); + return false; + } + + struct decode_state + { + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + uint32_t m_src_block_width; + uint32_t m_src_block_height; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + uint32_t m_output_rows_in_pixels; + + basisu::vector2D* m_pTemp_image; + + bool m_used_srgb_astc_decode_mode; + bool m_has_alpha; + }; + + decode_state dec_state; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_src_block_width = src_block_width; + dec_state.m_src_block_height = src_block_height; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + dec_state.m_output_rows_in_pixels = output_rows_in_pixels; + dec_state.m_pTemp_image = &temp_image; + dec_state.m_used_srgb_astc_decode_mode = false; // will be set by init from the compressed stream's header + dec_state.m_has_alpha = true; // will be set by init from the compressed stream's header + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != state.m_src_block_width) || (block_height != state.m_src_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (3)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (4)\n"); + return false; + } + + state.m_used_srgb_astc_decode_mode = srgb_decode_profile; + state.m_has_alpha = has_alpha; + + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + + color32 block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + bool decode_status = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels, state.m_src_block_width, state.m_src_block_height, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block_xuastc_ldr() failed\n"); + return false; + } + +#if defined(_DEBUG) || defined(DEBUG) + // sanity check vs. our vanilla/full-featured ASTC decoder + color32 alt_block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + if (!astc_helpers::decode_block(log_blk, alt_block_pixels, state.m_src_block_width, state.m_src_block_height, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + for (uint32_t i = 0; i < state.m_src_block_width * state.m_src_block_height; i++) + { + assert(block_pixels[i][0] == alt_block_pixels[i][0]); + assert(block_pixels[i][1] == alt_block_pixels[i][1]); + assert(block_pixels[i][2] == alt_block_pixels[i][2]); + assert(block_pixels[i][3] == alt_block_pixels[i][3]); + } +#endif + + color32* pSrc_pixels = block_pixels; + color32* pDst_pixels = &(*state.m_pTemp_image)(bx * state.m_src_block_width, by * state.m_src_block_height); + + for (uint32_t y = 0; y < state.m_src_block_height; y++) + { + memcpy(pDst_pixels, pSrc_pixels, state.m_src_block_width * sizeof(color32)); + + pSrc_pixels += state.m_src_block_width; + pDst_pixels += state.m_pTemp_image->get_width(); + } // y + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + + if (deblock_filtering) + { + if (!xuastc_deblock_filter( + decoded_image.m_actual_block_width, decoded_image.m_actual_block_height, + temp_image, temp_image, + stronger_deblocking, XUASTC_LDR_DEBLOCK_SKIP_THRESH)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: out of memory\n"); + return false; + } + } + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + if (dst_fmt_is_pvrtc1) + { + assert((dst_fmt_block_width == 4) && (dst_fmt_block_height == 4)); + + encode_pvrtc1(fmt, pDst_blocks, temp_image, dst_num_blocks_x, dst_num_blocks_y, from_alpha); + } + else + { + color32 block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + + for (uint32_t dst_by = 0; dst_by < dst_num_blocks_y; dst_by++) + { + uint8_t* pDst_block_u8 = (uint8_t*)pDst_blocks + dst_by * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t dst_bx = 0; dst_bx < dst_num_blocks_x; dst_bx++) + { + temp_image.extract_block_clamped(block_pixels, dst_bx * 4, dst_by * 4, 4, 4); + + transcode_4x4_block( + fmt, + dst_bx, dst_by, + pDst_blocks, pDst_block_u8, + block_pixels, + output_block_or_pixel_stride_in_bytes, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, + channel0, channel1, + high_quality, from_alpha, + bc7f_flags, + etc1_pack_state, + dec_state.m_has_alpha ? 1 : 0); + + pDst_block_u8 += output_block_or_pixel_stride_in_bytes; + + } // dst_bx + + } // dst_by + + } // if (dst_fmt_is_pvrtc1) + } + else + { + // No PVRTC1/ASTC, no deblocking. Unpack as few source row blocks into memory as possible needed for transcoding to 4x4. Output block size must be 4x4. + assert((dst_fmt_block_width == 4) && (dst_fmt_block_height == 4)); + + // Compute how many source block rows we need to buffer so we have a multiple of 4 scanlines. The max # of scanlines is 20. + uint32_t num_src_block_rows_to_buffer = 1; + while ((num_src_block_rows_to_buffer * src_block_height) & 3) + num_src_block_rows_to_buffer++; + assert((num_src_block_rows_to_buffer >= 1) && (num_src_block_rows_to_buffer <= 4)); + + // Compute how many 4x4 dest blocks fit into these many source rows. + assert(((num_src_block_rows_to_buffer * src_block_height) & 3) == 0); + //const uint32_t num_dst_block_rows_to_buffer = (num_src_block_rows_to_buffer * src_block_height) >> 2; + + const uint32_t dst_num_blocks_x = (orig_width + dst_fmt_block_width - 1) / dst_fmt_block_width; + const uint32_t dst_num_blocks_y = (orig_height + dst_fmt_block_height - 1) / dst_fmt_block_height; + + basisu::vector2D buffered_rows(src_num_blocks_x * src_block_width, num_src_block_rows_to_buffer * src_block_height); + + struct decode_state + { + uint32_t m_orig_height; + + uint32_t m_src_num_blocks_x; + uint32_t m_src_num_blocks_y; + uint32_t m_src_block_width; + uint32_t m_src_block_height; + + uint32_t m_dst_num_blocks_x; + uint32_t m_dst_num_blocks_y; + + void* m_pDst_blocks; + uint32_t m_output_row_pitch_in_blocks_or_pixels; + uint32_t m_output_block_or_pixel_stride_in_bytes; + uint32_t m_output_rows_in_pixels; + + uint32_t m_num_src_block_rows_to_buffer; + //uint32_t m_num_dst_block_rows_to_buffer; + + basisu::vector2D* m_pBuffered_rows; + + bool m_used_srgb_astc_decode_mode; + bool m_has_alpha; + + block_format m_fmt; + int m_channel0, m_channel1; + bool m_high_quality; + bool m_from_alpha; + uint32_t m_bc7f_flags; + etc1f::pack_etc1_state* m_pEtc1_pack_state; + }; + + decode_state dec_state; + dec_state.m_orig_height = orig_height; + dec_state.m_src_num_blocks_x = src_num_blocks_x; + dec_state.m_src_num_blocks_y = src_num_blocks_y; + dec_state.m_src_block_width = src_block_width; + dec_state.m_src_block_height = src_block_height; + dec_state.m_dst_num_blocks_x = dst_num_blocks_x; + dec_state.m_dst_num_blocks_y = dst_num_blocks_y; + dec_state.m_pDst_blocks = pDst_blocks; + dec_state.m_output_row_pitch_in_blocks_or_pixels = output_row_pitch_in_blocks_or_pixels; + dec_state.m_output_block_or_pixel_stride_in_bytes = output_block_or_pixel_stride_in_bytes; + dec_state.m_output_rows_in_pixels = output_rows_in_pixels; + + dec_state.m_num_src_block_rows_to_buffer = num_src_block_rows_to_buffer; + //dec_state.m_num_dst_block_rows_to_buffer = num_dst_block_rows_to_buffer; + + dec_state.m_pBuffered_rows = &buffered_rows; + dec_state.m_used_srgb_astc_decode_mode = false; // will be set by init from the compressed stream's header + dec_state.m_has_alpha = true; // will be set by init from the compressed stream's header + + dec_state.m_fmt = fmt; + dec_state.m_channel0 = channel0; + dec_state.m_channel1 = channel1; + dec_state.m_high_quality = high_quality; + dec_state.m_from_alpha = from_alpha; + dec_state.m_bc7f_flags = bc7f_flags; + dec_state.m_pEtc1_pack_state = &etc1_pack_state; + + auto init_func = [](uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData) + { + BASISU_NOTE_UNUSED(srgb_decode_profile); + BASISU_NOTE_UNUSED(dct_q); + + if (basisu::g_debug_printf) + basisu::debug_printf("init_func: %u %u %u %u %u %f %u\n", num_blocks_x, num_blocks_y, block_width, block_height, srgb_decode_profile, dct_q, has_alpha); + + decode_state& state = *(decode_state*)pData; + if ((block_width != state.m_src_block_width) || (block_height != state.m_src_block_height)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (3)\n"); + return false; + } + if ((num_blocks_x != state.m_src_num_blocks_x) || (num_blocks_y != state.m_src_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: header validation failed (4)\n"); + return false; + } + + state.m_used_srgb_astc_decode_mode = srgb_decode_profile; + state.m_has_alpha = has_alpha; + + return true; + }; + + auto src_block_func = [](uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData) + { + decode_state& state = *(decode_state*)pData; + assert((bx < state.m_src_num_blocks_x) && (by < state.m_src_num_blocks_y)); + + // Unpack ASTC block, distribute to temp output buffer. + color32 block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + bool decode_status = astc_helpers::decode_block_xuastc_ldr(log_blk, block_pixels, state.m_src_block_width, state.m_src_block_height, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8); + if (!decode_status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block_xuastc_ldr() failed\n"); + return false; + } + +#if defined(_DEBUG) || defined(DEBUG) + color32 alt_block_pixels[astc_helpers::MAX_BLOCK_PIXELS]; + if (!astc_helpers::decode_block(log_blk, alt_block_pixels, state.m_src_block_width, state.m_src_block_height, state.m_used_srgb_astc_decode_mode ? astc_helpers::cDecodeModeSRGB8 : astc_helpers::cDecodeModeLDR8)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_helpers::decode_block() failed\n"); + return false; + } + + for (uint32_t i = 0; i < state.m_src_block_width * state.m_src_block_height; i++) + { + assert(block_pixels[i][0] == alt_block_pixels[i][0]); + assert(block_pixels[i][1] == alt_block_pixels[i][1]); + assert(block_pixels[i][2] == alt_block_pixels[i][2]); + assert(block_pixels[i][3] == alt_block_pixels[i][3]); + } +#endif + + // TODO: For uncompressed outputs, we could write directly to the output buffer, skipping buffering. + const uint32_t buffered_src_block_row_y = (by % state.m_num_src_block_rows_to_buffer); + + color32* pSrc_pixels = block_pixels; + color32* pDst_pixels = &(*state.m_pBuffered_rows)(bx * state.m_src_block_width, buffered_src_block_row_y * state.m_src_block_height); + + for (uint32_t y = 0; y < state.m_src_block_height; y++) + { + memcpy(pDst_pixels, pSrc_pixels, state.m_src_block_width * sizeof(color32)); + + pSrc_pixels += state.m_src_block_width; + pDst_pixels += state.m_pBuffered_rows->get_width(); + } // y + + // Last block on this src row? If not, exit. + if (bx != (state.m_src_num_blocks_x - 1)) + return true; + + // We've written the final src block for this ASTC src row. + // See if we have enough source rows to create full 4x4 destination blocks. + const bool final_src_block_row = (by == (state.m_src_num_blocks_y - 1)); + + if ( (buffered_src_block_row_y != (state.m_num_src_block_rows_to_buffer - 1)) && (!final_src_block_row) ) + return true; + + // src/destination image Y coordinate of the top of the buffered rows + const uint32_t buffered_src_pixel_y = ((by / state.m_num_src_block_rows_to_buffer) * state.m_num_src_block_rows_to_buffer) * state.m_src_block_height; + assert((buffered_src_pixel_y & 3) == 0); + + // The total # of valid src block rows we can read. + const uint32_t num_buffered_src_block_rows = buffered_src_block_row_y + 1; + + assert((num_buffered_src_block_rows == state.m_num_src_block_rows_to_buffer) || (final_src_block_row)); + + // The maximum number of valid buffer scanlines we can fetch from, taking into account the original texture's actual (unpadded) height. + const uint32_t override_buffer_height = basisu::minimum(state.m_orig_height - buffered_src_pixel_y, num_buffered_src_block_rows * state.m_src_block_height); + assert(override_buffer_height); + + // total_dst_block_rows_to_emit=really an upper bound for the final row of src ASTC blocks + const uint32_t total_dst_block_rows_to_emit = (num_buffered_src_block_rows * state.m_src_block_height + 3) >> 2; + + for (uint32_t dst_ofs_by = 0; dst_ofs_by < total_dst_block_rows_to_emit; dst_ofs_by++) + { + const uint32_t dst_by = (buffered_src_pixel_y >> 2) + dst_ofs_by; + if (dst_by >= state.m_dst_num_blocks_y) + break; + + for (uint32_t dst_bx = 0; dst_bx < state.m_dst_num_blocks_x; dst_bx++) + { + // Extract the 4x4 block pixels from our buffered rows, taking into account the actual # of valid scanlines inside the buffer. + state.m_pBuffered_rows->extract_block_clamped(block_pixels, dst_bx * 4, dst_ofs_by * 4, 4, 4, override_buffer_height); + + uint8_t* pDst_block_u8 = (uint8_t*)state.m_pDst_blocks + (dst_by * state.m_output_row_pitch_in_blocks_or_pixels + dst_bx) * state.m_output_block_or_pixel_stride_in_bytes; + + transcode_4x4_block( + state.m_fmt, + dst_bx, dst_by, + state.m_pDst_blocks, pDst_block_u8, + block_pixels, + state.m_output_block_or_pixel_stride_in_bytes, state.m_output_row_pitch_in_blocks_or_pixels, state.m_output_rows_in_pixels, + state.m_channel0, state.m_channel1, + state.m_high_quality, state.m_from_alpha, + state.m_bc7f_flags, + *state.m_pEtc1_pack_state, + state.m_has_alpha ? 1 : 0); + + } // dst_bx + + } // dst_ofs_by + + return true; + }; + + xuastc_decoded_image decoded_image; + + const bool decomp_flag = decoded_image.decode(pImage_data, image_data_size, init_func, &dec_state, src_block_func, &dec_state); + if (!decomp_flag) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: astc_ldr_t::decompress_image() failed\n"); + return false; + } + } + + } // if (basis_tex_format_is_astc_ldr(src_format)) + + return true; +#else + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_slice: XUASTC support disabled\n"); + return false; +#endif // BASISD_SUPPORT_XUASTC +} + +// Container independent transcoding +bool basisu_lowlevel_xuastc_ldr_transcoder::transcode_image( + basis_tex_format src_format, bool use_astc_srgb_decode_profile, + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) +{ + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + +#if BASISD_SUPPORT_XUASTC + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(orig_width)) || (!basisu::is_pow2(orig_height))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_RGBA: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to ETC2 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC1_RGB: + { + // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though. + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1, + bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC3_RGBA: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to BC3 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC4_R: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC5_RG: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to BC5 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to BC7 failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGB: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to PVRTC1_RGB failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to PVRTC1_RGBA failed\n"); + } + break; + } + case transcoder_texture_format::cTFASTC_LDR_4x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x4_RGBA: + case transcoder_texture_format::cTFASTC_LDR_5x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_6x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x5_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x6_RGBA: + case transcoder_texture_format::cTFASTC_LDR_8x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x8_RGBA: + case transcoder_texture_format::cTFASTC_LDR_10x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x10_RGBA: + case transcoder_texture_format::cTFASTC_LDR_12x12_RGBA: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, xuastc_get_block_format(target_format), + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: UASTC LDR 4x4->ATC currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFFXT1_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: UASTC LDR 4x4->FXT1 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: UASTC LDR 4x4->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: UASTC LDR 4x4->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFETC2_EAC_R11: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_EAC_RG11: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to EAC RG11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA32: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB565: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBGR565: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + status = transcode_slice(src_format, use_astc_srgb_decode_profile, pOutput_blocks, src_num_blocks_x, src_num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; +#else + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_xuastc_ldr_transcoder::transcode_image: XUASTC support disabled\n"); + return false; +#endif // BASISD_SUPPORT_XUASTC +} + +} // namespace basist + diff --git a/external/basis_universal/transcoder/basisu_transcoder.h b/external/basis_universal/transcoder/basisu_transcoder.h index dc9329c342..df0e9cd3e8 100644 --- a/external/basis_universal/transcoder/basisu_transcoder.h +++ b/external/basis_universal/transcoder/basisu_transcoder.h @@ -1,5 +1,5 @@ // basisu_transcoder.h -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +13,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// Also see basis_tex_format in basisu_file_headers.h (TODO: Perhaps move key definitions into here.) #pragma once // By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default. @@ -22,17 +24,11 @@ #define BASISD_SUPPORT_KTX2 1 #endif -// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support #ifndef BASISD_SUPPORT_KTX2_ZSTD #define BASISD_SUPPORT_KTX2_ZSTD 1 #endif -// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. -#ifndef BASISU_FORCE_DEVEL_MESSAGES - // TODO - disable before checking in - #define BASISU_FORCE_DEVEL_MESSAGES 0 -#endif - #include "basisu_transcoder_internal.h" #include "basisu_transcoder_uastc.h" #include "basisu_file_headers.h" @@ -42,7 +38,7 @@ namespace basist // High-level composite texture formats supported by the transcoder. // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. // Notes: - // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a // fully opaque (255) alpha channel. // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. @@ -66,13 +62,13 @@ namespace basist cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. - // ASTC (mobile, Intel devices, hopefully all desktop GPU's one day) - cTFASTC_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. + // ASTC (mobile, some Intel CPU's, hopefully all desktop GPU's one day) + cTFASTC_LDR_4x4_RGBA = 10, // LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. // LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) - cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). @@ -94,7 +90,7 @@ namespace basist cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 - + // Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR 4x4 or ASTC HDR 6x6). cTFRGB_HALF = 24, // 48bpp RGB half (16-bits/component, 3 components) cTFRGBA_HALF = 25, // 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha) @@ -102,7 +98,23 @@ namespace basist cTFASTC_HDR_6x6_RGBA = 27, // HDR, RGBA (currently our ASTC HDR 6x6 encodes are only RGB), unsigned - cTFTotalTextureFormats = 28, + + // The remaining LDR ASTC block sizes, excluding 4x4 (which is above). There are 14 total valid ASTC LDR/HDR block sizes. + cTFASTC_LDR_5x4_RGBA = 28, + cTFASTC_LDR_5x5_RGBA = 29, + cTFASTC_LDR_6x5_RGBA = 30, + cTFASTC_LDR_6x6_RGBA = 31, + cTFASTC_LDR_8x5_RGBA = 32, + cTFASTC_LDR_8x6_RGBA = 33, + cTFASTC_LDR_10x5_RGBA = 34, + cTFASTC_LDR_10x6_RGBA = 35, + cTFASTC_LDR_8x8_RGBA = 36, + cTFASTC_LDR_10x8_RGBA = 37, + cTFASTC_LDR_10x10_RGBA = 38, + cTFASTC_LDR_12x10_RGBA = 39, + cTFASTC_LDR_12x12_RGBA = 40, + + cTFTotalTextureFormats = 41, // ----- The following are old/legacy enums for compatibility with code compiled against previous versions cTFETC1 = cTFETC1_RGB, @@ -112,25 +124,30 @@ namespace basist cTFBC4 = cTFBC4_R, cTFBC5 = cTFBC5_RG, - // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes. + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC LDR 4x4, which supports numerous modes. cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA, cTFBC7_M5 = cTFBC7_RGBA, cTFBC7_ALT = 7, - cTFASTC_4x4 = cTFASTC_4x4_RGBA, + cTFASTC_4x4 = cTFASTC_LDR_4x4_RGBA, cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, + + cTFASTC_4x4_RGBA = cTFASTC_LDR_4x4_RGBA }; // For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel. // NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing. uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt); - // Returns format's name in ASCII + // Returns the transcoder texture format's name in ASCII const char* basis_get_format_name(transcoder_texture_format fmt); + // Returns basis texture format name in ASCII + const char* basis_get_tex_format_name(basis_tex_format fmt); + // Returns block format name in ASCII const char* basis_get_block_format_name(block_format fmt); @@ -143,6 +160,9 @@ namespace basist // Returns true if the format is LDR. inline bool basis_transcoder_format_is_ldr(transcoder_texture_format fmt) { return !basis_transcoder_format_is_hdr(fmt); } + // Returns true if the format is an LDR or HDR ASTC format. + bool basis_is_transcoder_texture_format_astc(transcoder_texture_format fmt); + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); @@ -156,23 +176,32 @@ namespace basist uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. - uint32_t basis_get_block_width(transcoder_texture_format tex_type); + uint32_t basis_get_block_width(transcoder_texture_format fmt); // Returns the block height for the specified texture format, which is currently always 4. - uint32_t basis_get_block_height(transcoder_texture_format tex_type); - - // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR). + uint32_t basis_get_block_height(transcoder_texture_format fmt); + + // ASTC/XUASTC LDR formats only: Given a basis_tex_format (mode or codec), return the corresponding ASTC basisu::texture_format with the proper block size from 4x4-12x12. + basisu::texture_format basis_get_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt); + + // For any given basis_tex_format (mode or codec), return the LDR/HDR ASTC transcoder texture format with the proper block size. + transcoder_texture_format basis_get_transcoder_texture_format_from_basis_tex_format(basis_tex_format fmt); + // basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format: same as basis_get_transcoder_texture_format_from_basis_tex_format (TODO: remove) + transcoder_texture_format basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(basis_tex_format fmt); + + // Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR, or XUASTC LDR 4x4-12x12). + // For XUASTC the ASTC block size must match the transcoder_texture_format's ASTC block size. bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); // Returns the block width/height for the specified basis texture file format. uint32_t basis_tex_format_get_block_width(basis_tex_format fmt); uint32_t basis_tex_format_get_block_height(basis_tex_format fmt); - + bool basis_tex_format_is_hdr(basis_tex_format fmt); inline bool basis_tex_format_is_ldr(basis_tex_format fmt) { return !basis_tex_format_is_hdr(fmt); } - + // Validates that the output buffer is large enough to hold the entire transcoded texture. - // For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels. + // For uncompressed texture formats, most input parameters are in pixels, not blocks. bool basis_validate_output_buffer_size(transcoder_texture_format target_format, uint32_t output_blocks_buf_size_in_blocks_or_pixels, uint32_t orig_width, uint32_t orig_height, @@ -199,7 +228,7 @@ namespace basist basisu::vector m_block_endpoint_preds[2]; enum { cMaxPrevFrameLevels = 16 }; - basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] void clear() { @@ -214,7 +243,46 @@ namespace basist }; // Low-level helper classes that do the actual transcoding. + enum basisu_decode_flags + { + // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. + cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + + // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. + // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). + cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + + // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). + // This flag is used internally when decoding to BC3. + cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + // The output buffer contains alpha endpoint/selector indices. + // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. + cDecodeFlagsOutputHasAlphaIndices = 16, + + // Enable slower, but higher quality transcoding for some formats. + // For ASTC/XUASTC->BC7, this enables partially analytical encoding vs. fully analytical. + cDecodeFlagsHighQuality = 32, + + // Disable ETC1S->BC7 adaptive chroma filtering, for much faster transcoding to BC7. + cDecodeFlagsNoETC1SChromaFiltering = 64, + + // Disable deblock filtering for XUASTC LDR transcoding to non-ASTC formats. + // For ASTC 8x6 or smaller block sizes, deblocking is always disabled unless you force it on using cDecodeFlagsForceDeblockFiltering. + cDecodeFlagsNoDeblockFiltering = 128, + + // More aggressive deblock filtering (only used when it's enabled) + cDecodeFlagsStrongerDeblockFiltering = 256, + + // Always apply deblocking, even for smaller ASTC block sizes (4x4-8x6). + cDecodeFlagsForceDeblockFiltering = 512, + + // By default XUASTC LDR 4x4, 6x6 and 8x6 are directly transcoded to BC7 without always requiring a full ASTC block unpack and analytical BC7 encode. This is 1.4x up to 3x faster in WASM. + // This trade offs some quality. The largest transcoding speed gain is achieved when the source XUASTC data isn't dual plane and only uses 1 subset. Otherwise the actual perf. gain is variable. + // To disable this optimization for all XUASTC block sizes and always use the fallback encoder, specify cDecodeFlagXUASTCLDRDisableFastBC7Transcoding. + cDecodeFlagXUASTCLDRDisableFastBC7Transcoding = 1024 + }; + // ETC1S class basisu_lowlevel_etc1s_transcoder { @@ -279,69 +347,118 @@ namespace basist typedef basisu::vector selector_vec; const selector_vec& get_selectors() const { return m_local_selectors; } - + private: const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; endpoint_vec m_local_endpoints; selector_vec m_local_selectors; - + huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; basisu_transcoder_state m_def_state; }; - - enum basisu_decode_flags + + // UASTC LDR 4x4 + class basisu_lowlevel_uastc_ldr_4x4_transcoder { - // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. - cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + friend class basisu_transcoder; - // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. - // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). - cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + public: + basisu_lowlevel_uastc_ldr_4x4_transcoder(); - // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). - // This flag is used internally when decoding to BC3. - cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); - // The output buffer contains alpha endpoint/selector indices. - // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. - cDecodeFlagsOutputHasAlphaIndices = 16, + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } - cDecodeFlagsHighQuality = 32, + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint64_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; - cDecodeFlagsNoETC1SChromaFiltering = 64 +#if BASISD_SUPPORT_XUASTC + // XUASTC LDR 4x4-12x12 or ASTC LDR 4x4-12x12 + struct xuastc_decoded_image + { + uint32_t m_actual_block_width = 0, m_actual_block_height = 0, m_actual_width = 0, m_actual_height = 0; + bool m_actual_has_alpha = false, m_uses_srgb_astc_decode_mode = false; + + bool decode(const uint8_t* pImage_data, uint32_t image_data_size, + astc_ldr_t::xuastc_decomp_image_init_callback_ptr pInit_callback, void* pInit_callback_data, + astc_ldr_t::xuastc_decomp_image_block_callback_ptr pBlock_callback, void* pBlock_callback_data) + { + const bool decomp_flag = astc_ldr_t::xuastc_ldr_decompress_image(pImage_data, image_data_size, + m_actual_block_width, m_actual_block_height, + m_actual_width, m_actual_height, + m_actual_has_alpha, m_uses_srgb_astc_decode_mode, basisu::g_debug_printf, + pInit_callback, pInit_callback_data, + pBlock_callback, pBlock_callback_data); + + return decomp_flag; + } + + void clear() + { + m_actual_block_width = 0; + m_actual_block_height = 0; + m_actual_width = 0; + m_actual_height = 0; + m_actual_has_alpha = false; + m_uses_srgb_astc_decode_mode = false; + } }; +#endif - // UASTC LDR 4x4 - class basisu_lowlevel_uastc_ldr_4x4_transcoder + // This is both ASTC LDR 4x4-12x12 and XUASTC LDR 4x4-12x12. + class basisu_lowlevel_xuastc_ldr_transcoder { friend class basisu_transcoder; public: - basisu_lowlevel_uastc_ldr_4x4_transcoder(); + basisu_lowlevel_xuastc_ldr_transcoder(); - bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + bool transcode_slice(basis_tex_format src_format, bool use_astc_srgb_decode_profile, void* pDst_blocks, uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); - bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + bool transcode_slice(basis_tex_format src_format, bool use_astc_srgb_decode_profile, void* pDst_blocks, uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) { - return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + return transcode_slice(src_format, use_astc_srgb_decode_profile, pDst_blocks, src_num_blocks_x, src_num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags); } // Container independent transcoding bool transcode_image( + basis_tex_format src_format, bool use_astc_srgb_decode_profile, transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, const uint8_t* pCompressed_data, uint32_t compressed_data_length, - uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, uint64_t slice_offset, uint32_t slice_length, uint32_t decode_flags = 0, bool has_alpha = false, @@ -426,13 +543,13 @@ namespace basist int channel0 = -1, int channel1 = -1); }; - // ASTC HDR 6x6 intermediate - class basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder + // UASTC HDR 6x6 intermediate + class basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder { friend class basisu_transcoder; public: - basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder(); + basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder(); bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, @@ -465,9 +582,11 @@ namespace basist struct basisu_slice_info { + // The image's ACTUAL dimensions in texels. uint32_t m_orig_width; uint32_t m_orig_height; + // The texture's dimensions in texels - always a multiple of the texture's underlying block size (4x4-12x12). uint32_t m_width; uint32_t m_height; @@ -497,9 +616,11 @@ namespace basist uint32_t m_image_index; uint32_t m_total_levels; + // The image's ACTUAL dimensions in texels. uint32_t m_orig_width; uint32_t m_orig_height; - + + // The texture's dimensions in texels - always a multiple of the texture's underlying block size (4x4-12x12). uint32_t m_width; uint32_t m_height; @@ -583,12 +704,13 @@ namespace basist uint32_t m_block_height; bool m_y_flipped; // true if the image was Y flipped + bool m_srgb; // true if the image is sRGB, false if linear bool m_etc1s; // true if the file is ETC1S bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) }; // High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats. - // If you're just starting out this is the class you care about. + // If you're just starting out this is the class you care about (or see the KTX2 transcoder below). class basisu_transcoder { basisu_transcoder(basisu_transcoder&); @@ -639,11 +761,11 @@ namespace basist // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). - // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32 etc. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). - // Notes: + // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. @@ -682,15 +804,16 @@ namespace basist const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; } basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; } - const basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; } - basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; } + const basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_ldr_4x4_decoder; } + basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_ldr_4x4_decoder; } private: mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; - mutable basisu_lowlevel_uastc_ldr_4x4_transcoder m_lowlevel_uastc_decoder; + mutable basisu_lowlevel_uastc_ldr_4x4_transcoder m_lowlevel_uastc_ldr_4x4_decoder; + mutable basisu_lowlevel_xuastc_ldr_transcoder m_lowlevel_xuastc_ldr_decoder; mutable basisu_lowlevel_uastc_hdr_4x4_transcoder m_lowlevel_uastc_4x4_hdr_decoder; mutable basisu_lowlevel_astc_hdr_6x6_transcoder m_lowlevel_astc_6x6_hdr_decoder; - mutable basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_lowlevel_astc_6x6_hdr_intermediate_decoder; + mutable basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder m_lowlevel_astc_6x6_hdr_intermediate_decoder; bool m_ready_to_transcode; @@ -701,7 +824,7 @@ namespace basist // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -711,10 +834,10 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // Optional .KTX2 file format support // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 #pragma pack(push) #pragma pack(1) @@ -764,10 +887,19 @@ namespace basist basisu::packed_uint<4> m_alpha_slice_byte_length; }; - struct ktx2_astc_hdr_6x6_intermediate_image_desc + // The initial v1.6 release (for backwards compatibility only with our older .KTX2 files) + struct ktx2_slice_offset_len_desc_orig { - basisu::packed_uint<4> m_rgb_slice_byte_offset; - basisu::packed_uint<4> m_rgb_slice_byte_length; + basisu::packed_uint<4> m_slice_byte_offset; // byte offset relative to the KTX2 mipmap level + basisu::packed_uint<4> m_slice_byte_length; + }; + + // The Khronos KTX2 spec standard + struct ktx2_slice_offset_len_desc_std + { + basisu::packed_uint<4> m_slice_byte_offset; // byte offset relative to the KTX2 mipmap level + basisu::packed_uint<4> m_slice_byte_length; + basisu::packed_uint<4> m_profile; }; struct ktx2_animdata @@ -779,7 +911,7 @@ namespace basist #pragma pack(pop) const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; - + // These are standard Vulkan texture VkFormat ID's, see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html const uint32_t KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK = 1000066000; const uint32_t KTX2_FORMAT_ASTC_5x4_SFLOAT_BLOCK = 1000066001; @@ -789,12 +921,28 @@ namespace basist const uint32_t KTX2_FORMAT_ASTC_8x5_SFLOAT_BLOCK = 1000066005; const uint32_t KTX2_FORMAT_ASTC_8x6_SFLOAT_BLOCK = 1000066006; + const uint32_t KTX2_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, KTX2_FORMAT_ASTC_4x4_SRGB_BLOCK = 158; + const uint32_t KTX2_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, KTX2_FORMAT_ASTC_5x4_SRGB_BLOCK = 160; + const uint32_t KTX2_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, KTX2_FORMAT_ASTC_5x5_SRGB_BLOCK = 162; + const uint32_t KTX2_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, KTX2_FORMAT_ASTC_6x5_SRGB_BLOCK = 164; + const uint32_t KTX2_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, KTX2_FORMAT_ASTC_6x6_SRGB_BLOCK = 166; + const uint32_t KTX2_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, KTX2_FORMAT_ASTC_8x5_SRGB_BLOCK = 168; + const uint32_t KTX2_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, KTX2_FORMAT_ASTC_8x6_SRGB_BLOCK = 170; + const uint32_t KTX2_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, KTX2_FORMAT_ASTC_10x5_SRGB_BLOCK = 174; + const uint32_t KTX2_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, KTX2_FORMAT_ASTC_10x6_SRGB_BLOCK = 176; + const uint32_t KTX2_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, KTX2_FORMAT_ASTC_8x8_SRGB_BLOCK = 172; // note the ASTC block size order is off in the vkFormat definitions + const uint32_t KTX2_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, KTX2_FORMAT_ASTC_10x8_SRGB_BLOCK = 178; + const uint32_t KTX2_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, KTX2_FORMAT_ASTC_10x10_SRGB_BLOCK = 180; + const uint32_t KTX2_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, KTX2_FORMAT_ASTC_12x10_SRGB_BLOCK = 182; + const uint32_t KTX2_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, KTX2_FORMAT_ASTC_12x12_SRGB_BLOCK = 184; + const uint32_t KTX2_KDF_DF_MODEL_ASTC = 162; // 0xA2 const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; // 0xA3 const uint32_t KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 = 166; // 0xA6 const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 = 167; // 0xA7 - const uint32_t KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE = 168; // 0xA8, TODO - coordinate with Khronos on this - + const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE = 168; // 0xA8, TODO - coordinate with Khronos on this + const uint32_t KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE = 169; // 0xA9, TODO - coordinate with Khronos on this + const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased @@ -806,9 +954,11 @@ namespace basist enum ktx2_supercompression { KTX2_SS_NONE = 0, - KTX2_SS_BASISLZ = 1, + KTX2_SS_BASISLZ = 1, // actually ETC1S KTX2_SS_ZSTANDARD = 2, - KTX2_SS_BASIS + KTX2_SS_DEFLATE = 3, // currently unsupported by us + KTX2_SS_UASTC_HDR_6x6I = 4, // UASTC HDR 6x6i (picked by Khronos, in KTX-Software as of 2/19/2026) + KTX2_SS_XUASTC_LDR = 5 // XUASTC LDR 4x4-12x12 (coordinate with Khronos, not in KTX-Software yet as of 2/19/2026) }; extern const uint8_t g_ktx2_file_identifier[12]; @@ -878,12 +1028,12 @@ namespace basist { case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; case KTX2_DF_PRIMARIES_BT709: return "BT709"; - case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; case KTX2_DF_PRIMARIES_ACES: return "ACES"; - case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; @@ -891,7 +1041,7 @@ namespace basist default: break; } return "?"; - } + } // Information about a single 2D texture "image" in a KTX2 file. struct ktx2_image_level_info @@ -901,19 +1051,19 @@ namespace basist uint32_t m_layer_index; uint32_t m_face_index; - // The image's actual (or the original source image's) width/height in pixels, which may not be divisible by 4 pixels. + // The image's ACTUAL (or the original source image's) width/height in pixels, which may not be divisible by the block size (4-12 pixels). uint32_t m_orig_width; uint32_t m_orig_height; - // The image's physical width/height, which will always be divisible by 4 pixels. + // The image's physical width/height, which will always be divisible by the format's block size (4-12 pixels). uint32_t m_width; uint32_t m_height; - - // The texture's dimensions in 4x4 or 6x6 texel blocks. + + // The texture's dimensions in 4x4-12x12 texel blocks. uint32_t m_num_blocks_x; uint32_t m_num_blocks_y; - // The format's block width/height (currently either 4 or 6). + // The format's block width/height (4-12). uint32_t m_block_width; uint32_t m_block_height; @@ -926,7 +1076,7 @@ namespace basist // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. bool m_iframe_flag; }; - + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) struct ktx2_transcoder_state { @@ -944,9 +1094,9 @@ namespace basist // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. // It does not support 1D or 3D textures. - // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. - // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. - // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports our codec formats: ETC1S, UASTC LDR 4x4, UASTC HDR 4x4, etc. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). class ktx2_transcoder { @@ -971,10 +1121,10 @@ namespace basist // Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init(). const basisu::vector& get_level_index() const { return m_levels; } - // Returns the texture's width in texels. Always non-zero, might not be divisible by 4. Valid after init(). + // Returns the texture's width in texels. Always non-zero, might not be divisible by the block size. Valid after init(). uint32_t get_width() const { return m_header.m_pixel_width; } - // Returns the texture's height in texels. Always non-zero, might not be divisible by 4. Valid after init(). + // Returns the texture's height in texels. Always non-zero, might not be divisible by the block size. Valid after init(). uint32_t get_height() const { return m_header.m_pixel_height; } // Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init(). @@ -986,15 +1136,15 @@ namespace basist // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). uint32_t get_layers() const { return m_header.m_layer_count; } - // Returns cETC1S, cUASTC4x4, cUASTC_HDR_4x4, cASTC_HDR_6x6, cASTC_HDR_6x6_INTERMEDIATE. Valid after init(). + // Returns cETC1S, cUASTC4x4, cUASTC_HDR_4x4, cASTC_HDR_6x6, cUASTC_HDR_6x6_INTERMEDIATE, etc. Valid after init(). basist::basis_tex_format get_basis_tex_format() const { return m_format; } // ETC1S LDR 4x4 bool is_etc1s() const { return get_basis_tex_format() == basist::basis_tex_format::cETC1S; } // UASTC LDR 4x4 (only) - bool is_uastc() const { return get_basis_tex_format() == basist::basis_tex_format::cUASTC4x4; } - + bool is_uastc() const { return get_basis_tex_format() == basist::basis_tex_format::cUASTC_LDR_4x4; } + // Is ASTC HDR 4x4 or 6x6 bool is_hdr() const { @@ -1006,18 +1156,26 @@ namespace basist return !is_hdr(); } + // is UASTC HDR 4x4 (which is also standard ASTC HDR 4x4 data) bool is_hdr_4x4() const { return (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_4x4); } + // is ASTC HDR 6x6 or UASTC HDR 6x6 intermediate (only) bool is_hdr_6x6() const { - return (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6) || (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + return (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6) || (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); } + + // is ASTC LDR 4x4-12x12 (only) + bool is_astc_ldr() const { return basis_tex_format_is_astc_ldr(get_basis_tex_format()); } + + // is XUASTC LDR 4x4-12x12 (only) + bool is_xuastc_ldr() const { return basis_tex_format_is_xuastc_ldr(get_basis_tex_format()); } uint32_t get_block_width() const { return basis_tex_format_get_block_width(get_basis_tex_format()); } - uint32_t get_block_height() const { return basis_tex_format_get_block_height(get_basis_tex_format()); } + uint32_t get_block_height() const { return basis_tex_format_get_block_height(get_basis_tex_format()); } // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). uint32_t get_has_alpha() const { return m_has_alpha; } @@ -1032,17 +1190,19 @@ namespace basist // Returns the DFD color primary. // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } - + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } + bool is_srgb() const { return (get_dfd_transfer_func() == KTX2_KHR_DF_TRANSFER_SRGB); } + uint32_t get_dfd_flags() const { return m_dfd_flags; } // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). uint32_t get_dfd_total_samples() const { return m_dfd_samples; } - - // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. - // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. // It's up to the caller to decide what to do if the value isn't in the enum. ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } @@ -1050,11 +1210,11 @@ namespace basist // Key value field data. struct key_value { - // The key field is UTF8 and always zero terminated. + // The key field is UTF8 and always zero terminated. // In memory we always append a zero terminator to the key. basisu::uint8_vec m_key; - // The value may be empty. In the KTX2 file it consists of raw bytes which may or may not be zero terminated. + // The value may be empty. In the KTX2 file it consists of raw bytes which may or may not be zero terminated. // In memory we always append a zero terminator to the value. basisu::uint8_vec m_value; @@ -1076,7 +1236,7 @@ namespace basist // Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called. const basisu::vector& get_etc1s_image_descs() const { return m_etc1s_image_descs; } - const basisu::vector& get_astc_hdr_6x6_intermediate_image_descs() const { return m_astc_6x6_intermediate_image_descs; } + const basisu::vector& get_slice_offset_len_descs() const { return m_slice_offset_len_descs; } // Must have called startTranscoding() first uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; @@ -1084,21 +1244,21 @@ namespace basist // is_video() is only valid after start_transcoding() is called. // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. bool is_video() const { return m_is_video; } - + // Defaults to 0, only non-zero if the key existed in the source KTX2 file. float get_ldr_hdr_upconversion_nit_multiplier() const { return m_ldr_hdr_upconversion_nit_multiplier; } - - // start_transcoding() MUST be called before calling transcode_image(). + + // start_transcoding() MUST be called before calling transcode_image_level(). // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. bool start_transcoding(); - + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). - // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. @@ -1108,7 +1268,7 @@ namespace basist basist::transcoder_texture_format fmt, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, ktx2_transcoder_state *pState = nullptr); - + private: const uint8_t* m_pData; uint32_t m_data_size; @@ -1117,26 +1277,30 @@ namespace basist basisu::vector m_levels; basisu::uint8_vec m_dfd; key_value_vec m_key_values; - + ktx2_etc1s_global_data_header m_etc1s_header; basisu::vector m_etc1s_image_descs; - basisu::vector m_astc_6x6_intermediate_image_descs; + basisu::vector m_slice_offset_len_descs; basist::basis_tex_format m_format; - + uint32_t m_dfd_color_model; ktx2_df_color_primaries m_dfd_color_prims; - uint32_t m_dfd_transfer_func; + + // KTX2_KHR_DF_TRANSFER_LINEAR vs. KTX2_KHR_DF_TRANSFER_SRGB (for XUASTC LDR: which profile was used during encoding) + uint32_t m_dfd_transfer_func; + uint32_t m_dfd_flags; uint32_t m_dfd_samples; ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; - + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; - basist::basisu_lowlevel_uastc_ldr_4x4_transcoder m_uastc_transcoder; + basist::basisu_lowlevel_uastc_ldr_4x4_transcoder m_uastc_ldr_transcoder; + basist::basisu_lowlevel_xuastc_ldr_transcoder m_xuastc_ldr_transcoder; basist::basisu_lowlevel_uastc_hdr_4x4_transcoder m_uastc_hdr_transcoder; basist::basisu_lowlevel_astc_hdr_6x6_transcoder m_astc_hdr_6x6_transcoder; - basist::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_astc_hdr_6x6_intermediate_transcoder; - + basist::basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder m_astc_hdr_6x6_intermediate_transcoder; + ktx2_transcoder_state m_def_transcoder_state; bool m_has_alpha; @@ -1144,7 +1308,7 @@ namespace basist float m_ldr_hdr_upconversion_nit_multiplier; bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data); - bool read_astc_6x6_hdr_intermediate_global_data(); + bool read_slice_offset_len_global_data(bool read_std_structs); bool decompress_etc1s_global_data(); bool read_key_values(); }; @@ -1165,7 +1329,7 @@ namespace basist break; } } - + if (!p) p = key_values.enlarge(1); @@ -1189,3 +1353,4 @@ namespace basist bool basisu_transcoder_supports_ktx2_zstd(); } // namespace basisu + diff --git a/external/basis_universal/transcoder/basisu_transcoder_internal.h b/external/basis_universal/transcoder/basisu_transcoder_internal.h index 5480ba32f6..041d0849ae 100644 --- a/external/basis_universal/transcoder/basisu_transcoder_internal.h +++ b/external/basis_universal/transcoder/basisu_transcoder_internal.h @@ -1,5 +1,5 @@ // basisu_transcoder_internal.h - Universal texture format transcoder library. -// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2026 Binomial LLC. All Rights Reserved. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // @@ -22,8 +22,11 @@ // v1.50: Added UASTC HDR 4x4 support // v1.60: Added RDO ASTC HDR 6x6 and intermediate support -#define BASISD_LIB_VERSION 160 -#define BASISD_VERSION_STRING "01.60" +// v1.65: Added ASTC LDR 4x4-12x12 and XUASTC LDR 4x4-12x12 (not publically released) +// v2.00: Added unified effort/quality options across all formats, fast direct transcoding of XUASTC 4x4/6x6/8x6 to BC7, adaptive deblocking, ZStd or arithmetic profiles, weight grid DCT +// v2.10: Khronos modifications to KTX2 file format for UASTC HDR 6x6i support for KTX-Software compatiblity (we're also modifying how XUASTC LDR files use KTX2 to be compatible) +#define BASISD_LIB_VERSION 210 +#define BASISD_VERSION_STRING "02.10" #ifdef _DEBUG #define BASISD_BUILD_DEBUG @@ -32,6 +35,7 @@ #endif #include "basisu.h" +#include "basisu_astc_helpers.h" #define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) @@ -46,9 +50,9 @@ namespace basist // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { - cETC1, // ETC1S RGB + cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block - cBC1, // DXT1 RGB + cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks @@ -58,9 +62,9 @@ namespace basist cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) - cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + cASTC_LDR_4x4, // ASTC LDR 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB decode mode is not enabled when outputting ASTC LDR for ETC1S/UASTC LDR 4x4. // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. - + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size @@ -70,16 +74,16 @@ namespace basist cETC2_EAC_R11, cETC2_EAC_RG11, - + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, - + cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, @@ -91,12 +95,73 @@ namespace basist cUASTC_4x4, // LDR, universal cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed cBC6H, + cASTC_HDR_4x4, cASTC_HDR_6x6, + // The remaining ASTC LDR block sizes. + cASTC_LDR_5x4, + cASTC_LDR_5x5, + cASTC_LDR_6x5, + cASTC_LDR_6x6, + cASTC_LDR_8x5, + cASTC_LDR_8x6, + cASTC_LDR_10x5, + cASTC_LDR_10x6, + cASTC_LDR_8x8, + cASTC_LDR_10x8, + cASTC_LDR_10x10, + cASTC_LDR_12x10, + cASTC_LDR_12x12, + cTotalBlockFormats }; + inline bool block_format_is_hdr(block_format fmt) + { + switch (fmt) + { + case block_format::cUASTC_HDR_4x4: + case block_format::cBC6H: + case block_format::cASTC_HDR_4x4: + case block_format::cASTC_HDR_6x6: + return true; + default: + break; + } + + return false; + } + + // LDR or HDR ASTC? + inline bool block_format_is_astc(block_format fmt) + { + switch (fmt) + { + case block_format::cASTC_LDR_4x4: + case block_format::cASTC_LDR_5x4: + case block_format::cASTC_LDR_5x5: + case block_format::cASTC_LDR_6x5: + case block_format::cASTC_LDR_6x6: + case block_format::cASTC_LDR_8x5: + case block_format::cASTC_LDR_8x6: + case block_format::cASTC_LDR_10x5: + case block_format::cASTC_LDR_10x6: + case block_format::cASTC_LDR_8x8: + case block_format::cASTC_LDR_10x8: + case block_format::cASTC_LDR_10x10: + case block_format::cASTC_LDR_12x10: + case block_format::cASTC_LDR_12x12: + case block_format::cASTC_HDR_4x4: + case block_format::cASTC_HDR_6x6: + return true; + default: + break; + } + + return false; + } + inline uint32_t get_block_width(block_format fmt) { switch (fmt) @@ -105,6 +170,21 @@ namespace basist return 8; case block_format::cASTC_HDR_6x6: return 6; + + case block_format::cASTC_LDR_5x4: return 5; + case block_format::cASTC_LDR_5x5: return 5; + case block_format::cASTC_LDR_6x5: return 6; + case block_format::cASTC_LDR_6x6: return 6; + case block_format::cASTC_LDR_8x5: return 8; + case block_format::cASTC_LDR_8x6: return 8; + case block_format::cASTC_LDR_10x5: return 10; + case block_format::cASTC_LDR_10x6: return 10; + case block_format::cASTC_LDR_8x8: return 8; + case block_format::cASTC_LDR_10x8: return 10; + case block_format::cASTC_LDR_10x10: return 10; + case block_format::cASTC_LDR_12x10: return 12; + case block_format::cASTC_LDR_12x12: return 12; + default: break; } @@ -117,6 +197,20 @@ namespace basist { case block_format::cASTC_HDR_6x6: return 6; + + case block_format::cASTC_LDR_5x5: return 5; + case block_format::cASTC_LDR_6x5: return 5; + case block_format::cASTC_LDR_6x6: return 6; + case block_format::cASTC_LDR_8x5: return 5; + case block_format::cASTC_LDR_8x6: return 6; + case block_format::cASTC_LDR_10x5: return 5; + case block_format::cASTC_LDR_10x6: return 6; + case block_format::cASTC_LDR_8x8: return 8; + case block_format::cASTC_LDR_10x8: return 8; + case block_format::cASTC_LDR_10x10: return 10; + case block_format::cASTC_LDR_12x10: return 10; + case block_format::cASTC_LDR_12x12: return 12; + default: break; } @@ -140,9 +234,31 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - + uint16_t crc16(const void *r, size_t size, uint16_t crc); + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + + template + struct bit_hasher + { + inline std::size_t operator()(const Key& k) const + { + return hash_hsieh(reinterpret_cast(&k), sizeof(k)); + } + }; + + struct string_hasher + { + inline std::size_t operator()(const std::string& k) const + { + size_t l = k.size(); + if (!l) + return 0; + return hash_hsieh(reinterpret_cast(k.c_str()), l); + } + }; + class huffman_decoding_table { friend class bitwise_decoder; @@ -260,7 +376,7 @@ namespace basist return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); - + if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; @@ -350,7 +466,7 @@ namespace basist void stop() { } - + inline uint32_t peek_bits(uint32_t num_bits) { if (!num_bits) @@ -429,14 +545,14 @@ namespace basist for (;;) { uint32_t k = peek_bits(16); - + uint32_t l = 0; while (k & 1) { l++; k >>= 1; } - + q += l; remove_bits(l); @@ -454,7 +570,7 @@ namespace basist const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -466,7 +582,7 @@ namespace basist if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -482,7 +598,7 @@ namespace basist assert(ct.m_code_sizes.size()); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; - + while (m_bit_buf_size < 16) { uint32_t c = 0; @@ -493,7 +609,7 @@ namespace basist m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } - + int code_len; int sym; @@ -603,6 +719,100 @@ namespace basist uint32_t m_bit_buf_size; }; + class simplified_bitwise_decoder + { + public: + simplified_bitwise_decoder() : + m_pBuf(nullptr), + m_pBuf_end(nullptr), + m_bit_buf(0) + { + } + + void clear() + { + m_pBuf = nullptr; + m_pBuf_end = nullptr; + m_bit_buf = 0; + } + + bool init(const uint8_t* pBuf, size_t buf_size) + { + if ((!pBuf) && (buf_size)) + return false; + + m_pBuf = pBuf; + m_pBuf_end = pBuf + buf_size; + m_bit_buf = 1; + return true; + } + + bool init(const basisu::uint8_vec& buf) + { + return init(buf.data(), buf.size()); + } + + // num_bits must be 1, 2, 4 or 8 and codes cannot cross bytes + inline uint32_t get_bits(uint32_t num_bits) + { + assert(m_pBuf); + + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + + const uint32_t mask = (1 << num_bits) - 1; + const uint32_t res = m_bit_buf & mask; + m_bit_buf >>= num_bits; + assert(m_bit_buf >= 1); + + return res; + } + + inline uint32_t get_bits1() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 1; + m_bit_buf >>= 1; + assert(m_bit_buf >= 1); + return res; + } + + inline uint32_t get_bits2() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 3; + m_bit_buf >>= 2; + assert(m_bit_buf >= 1); + return res; + } + + inline uint32_t get_bits4() + { + assert(m_pBuf); + if (m_bit_buf <= 1) + m_bit_buf = 256 | ((m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0); + const uint32_t res = m_bit_buf & 15; + m_bit_buf >>= 4; + assert(m_bit_buf >= 1); + return res; + } + + // No bitbuffer, can only ever retrieve bytes correctly. + inline uint32_t get_bits8() + { + assert(m_pBuf); + return (m_pBuf < m_pBuf_end) ? *m_pBuf++ : 0; + } + + const uint8_t* m_pBuf; + const uint8_t* m_pBuf_end; + uint32_t m_bit_buf; + }; + inline uint32_t basisd_rand(uint32_t seed) { if (!seed) @@ -684,7 +894,7 @@ namespace basist }; struct decoder_etc_block; - + inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); @@ -708,11 +918,12 @@ namespace basist }; uint8_t c[4]; - + uint32_t m; }; - color32() { } + //color32() { } + color32() = default; color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } @@ -744,6 +955,172 @@ namespace basist bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } }; + // This duplicates key functionality in the encoder library's color_rgba class. Porting and retesting code that uses it to color32 is impractical. + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + inline color_rgba(const color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba& set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba& set(int y) + { + m_comps[0] = static_cast(basisu::clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba& set(int y, int na) + { + m_comps[0] = static_cast(basisu::clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(basisu::clamp(na, 0, 255)); + return *this; + } + + inline color_rgba& set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(basisu::clamp(sr, 0, 255)); + m_comps[1] = static_cast(basisu::clamp(sg, 0, 255)); + m_comps[2] = static_cast(basisu::clamp(sb, 0, 255)); + m_comps[3] = static_cast(basisu::clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba& set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(basisu::clamp(sr, 0, 255)); + m_comps[1] = static_cast(basisu::clamp(sg, 0, 255)); + m_comps[2] = static_cast(basisu::clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba& set_rgb(const color_rgba& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t& operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t& operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba& rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba& rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba& rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline color32 get_color32() const + { + return color32(r, g, b, a); + } + + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + }; + struct selector { // Plain selectors (2-bits per value) @@ -989,7 +1366,7 @@ namespace basist extern const uint8_t g_bc6h_weight4[16]; extern const int8_t g_bc6h_mode_lookup[32]; - + // Converts b16 to half float inline half_float bc6h_blog16_to_half(uint32_t comp) { @@ -1003,7 +1380,7 @@ namespace basist const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF; // Inverts bc6h_blog16_to_half(). - // Returns the nearest blog16 given a half value. + // Returns the nearest blog16 given a half value. inline uint32_t bc6h_half_to_blog16(half_float h) { assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT); @@ -1044,10 +1421,1844 @@ namespace basist }; void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk); - + namespace bc7_mode_5_encoder { void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode); } -} // namespace basist + namespace astc_6x6_hdr + { + extern uint8_t g_quantize_tables_preserve2[21 - 1][256]; // astc_helpers::TOTAL_ISE_RANGES=21 + extern uint8_t g_quantize_tables_preserve3[21 - 1][256]; + } // namespace astc_6x6_hdr + +#if BASISD_SUPPORT_XUASTC + namespace astc_ldr_t + { + const uint32_t ARITH_HEADER_MARKER = 0x01; + const uint32_t ARITH_HEADER_MARKER_BITS = 5; + + const uint32_t FULL_ZSTD_HEADER_MARKER = 0x01; + const uint32_t FULL_ZSTD_HEADER_MARKER_BITS = 5; + + const uint32_t FINAL_SYNC_MARKER = 0xAF; + const uint32_t FINAL_SYNC_MARKER_BITS = 8; + + const uint32_t cMaxConfigReuseNeighbors = 3; + +#pragma pack(push, 1) + struct xuastc_ldr_arith_header + { + uint8_t m_flags; + basisu::packed_uint<4> m_arith_bytes_len; + basisu::packed_uint<4> m_mean0_bits_len; + basisu::packed_uint<4> m_mean1_bytes_len; + basisu::packed_uint<4> m_run_bytes_len; + basisu::packed_uint<4> m_coeff_bytes_len; + basisu::packed_uint<4> m_sign_bits_len; + basisu::packed_uint<4> m_weight2_bits_len; // 2-bit weights (4 per byte), up to BISE_4_LEVELS + basisu::packed_uint<4> m_weight3_bits_len; // 3-bit weights (2 per byte), up to BISE_8_LEVELS + basisu::packed_uint<4> m_weight4_bits_len; // 4-bit weights (2 per byte), up to BISE_16_LEVELS + basisu::packed_uint<4> m_weight8_bytes_len; // 8-bit weights (1 per byte), up to BISE_32_LEVELS + basisu::packed_uint<4> m_unused; // Future expansion + }; + + struct xuastc_ldr_full_zstd_header + { + uint8_t m_flags; + + // Control + basisu::packed_uint<4> m_raw_bits_len; // uncompressed + basisu::packed_uint<4> m_mode_bytes_len; + basisu::packed_uint<4> m_solid_dpcm_bytes_len; + + // Endpoint DPCM + basisu::packed_uint<4> m_endpoint_dpcm_reuse_indices_len; + basisu::packed_uint<4> m_use_bc_bits_len; + basisu::packed_uint<4> m_endpoint_dpcm_3bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_4bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_5bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_6bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_7bit_len; + basisu::packed_uint<4> m_endpoint_dpcm_8bit_len; + + // Weight grid DCT + basisu::packed_uint<4> m_mean0_bits_len; + basisu::packed_uint<4> m_mean1_bytes_len; + basisu::packed_uint<4> m_run_bytes_len; + basisu::packed_uint<4> m_coeff_bytes_len; + basisu::packed_uint<4> m_sign_bits_len; + + // Weight DPCM + basisu::packed_uint<4> m_weight2_bits_len; // 2-bit weights (4 per byte), up to BISE_4_LEVELS + basisu::packed_uint<4> m_weight3_bits_len; // 3-bit weights (4 per byte), up to BISE_8_LEVELS + basisu::packed_uint<4> m_weight4_bits_len; // 4-bit weights (2 per byte), up to BISE_16_LEVELS + basisu::packed_uint<4> m_weight8_bytes_len; // 8-bit weights (1 per byte), up to BISE_32_LEVELS + + basisu::packed_uint<4> m_unused; // Future expansion + }; +#pragma pack(pop) + + const uint32_t DCT_RUN_LEN_EOB_SYM_INDEX = 64; + const uint32_t DCT_MAX_ARITH_COEFF_MAG = 255; + + const uint32_t DCT_MEAN_LEVELS0 = 9, DCT_MEAN_LEVELS1 = 33; + + const uint32_t PART_HASH_BITS = 6u; + const uint32_t PART_HASH_SIZE = 1u << PART_HASH_BITS; + + const uint32_t TM_HASH_BITS = 7u; + const uint32_t TM_HASH_SIZE = 1u << TM_HASH_BITS; + + typedef basisu::vector fvec; + + void init(); + + color_rgba blue_contract_enc(color_rgba orig, bool& did_clamp, int encoded_b); + color_rgba blue_contract_dec(int enc_r, int enc_g, int enc_b, int enc_a); + + struct astc_block_grid_config + { + uint16_t m_block_width, m_block_height; + uint16_t m_grid_width, m_grid_height; + + astc_block_grid_config() {} + + astc_block_grid_config(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height) + { + assert((block_width >= 4) && (block_width <= 12)); + assert((block_height >= 4) && (block_height <= 12)); + m_block_width = (uint16_t)block_width; + m_block_height = (uint16_t)block_height; + + assert((grid_width >= 2) && (grid_width <= block_width)); + assert((grid_height >= 2) && (grid_height <= block_height)); + m_grid_width = (uint16_t)grid_width; + m_grid_height = (uint16_t)grid_height; + } + + bool operator==(const astc_block_grid_config& other) const + { + return (m_block_width == other.m_block_width) && (m_block_height == other.m_block_height) && + (m_grid_width == other.m_grid_width) && (m_grid_height == other.m_grid_height); + } + }; + + struct astc_block_grid_data + { + float m_weight_gamma; + + // An unfortunate difference of containers, but in memory these matrices are both addressed as [r][c]. + basisu::vector2D m_upsample_matrix; + + basisu::vector m_downsample_matrix; + + astc_block_grid_data() {} + astc_block_grid_data(float weight_gamma) : m_weight_gamma(weight_gamma) {} + }; + + typedef basisu::hash_map > astc_block_grid_data_hash_t; + + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color32& l, color32& h); + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color32& l, color32& h, float* pScale = nullptr); + + void decode_endpoints_ise20(uint32_t cem_index, const uint8_t* pEndpoint_vals, color_rgba& l, color_rgba& h); + void decode_endpoints(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index, color_rgba& l, color_rgba& h, float* pScale = nullptr); + + void compute_adjoint_downsample_matrix(basisu::vector& downsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + void compute_upsample_matrix(basisu::vector2D& upsample_matrix, uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + class dct2f + { + enum { cMaxSize = 12 }; + + public: + dct2f() : m_rows(0u), m_cols(0u) {} + + // call with grid_height/grid_width (INVERTED) + bool init(uint32_t rows, uint32_t cols); + + uint32_t rows() const { return m_rows; } + uint32_t cols() const { return m_cols; } + + void forward(const float* pSrc, float* pDst, fvec& work) const; + + void inverse(const float* pSrc, float* pDst, fvec& work) const; + + // check variants use a less optimized implementation, used for sanity checking + void inverse_check(const float* pSrc, float* pDst, fvec& work) const; + + void forward(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + void inverse(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + void inverse_check(const float* pSrc, uint32_t src_stride, + float* pDst, uint32_t dst_stride, fvec& work) const; + + private: + uint32_t m_rows, m_cols; + fvec m_c_col; // [u*m_rows + x] + fvec m_c_row; // [v*m_cols + y] + fvec m_a_col; // alpha(u) + fvec m_a_row; // alpha(v) + }; + + struct dct_syms + { + dct_syms() + { + clear(); + } + + void clear() + { + m_dc_sym = 0; + m_num_dc_levels = 0; + m_coeffs.resize(0); + m_max_coeff_mag = 0; + m_max_zigzag_index = 0; + } + + uint32_t m_dc_sym; + uint32_t m_num_dc_levels; + + struct coeff + { + uint16_t m_num_zeros; + int16_t m_coeff; // or INT16_MAX if invalid + + coeff() {} + coeff(uint16_t num_zeros, int16_t coeff) : m_num_zeros(num_zeros), m_coeff(coeff) {} + }; + + basisu::static_vector m_coeffs; + + uint32_t m_max_coeff_mag; + uint32_t m_max_zigzag_index; + }; + + struct grid_dim_key + { + int m_grid_width; + int m_grid_height; + + grid_dim_key() {} + + grid_dim_key(int w, int h) : m_grid_width(w), m_grid_height(h) {} + + bool operator== (const grid_dim_key& rhs) const + { + return (m_grid_width == rhs.m_grid_width) && (m_grid_height == rhs.m_grid_height); + } + }; + + struct grid_dim_value + { + basisu::int_vec m_zigzag; + dct2f m_dct; + }; + + typedef basisu::hash_map > grid_dim_hash_map; + + void init_astc_block_grid_data_hash(); + + const astc_block_grid_data* find_astc_block_grid_data(uint32_t block_width, uint32_t block_height, uint32_t grid_width, uint32_t grid_height); + + const float DEADZONE_ALPHA = .5f; + const float SCALED_WEIGHT_BASE_CODING_SCALE = .5f; // typically ~5 bits [0,32], or 3 [0,8] + + struct sample_quant_table_state + { + float m_q, m_sx, m_sy, m_level_scale; + + void init(float q, + uint32_t block_width, uint32_t block_height, + float level_scale) + { + m_q = q; + m_level_scale = level_scale; + + const int Bx = block_width, By = block_height; + + m_sx = (float)8.0f / (float)Bx; + m_sy = (float)8.0f / (float)By; + } + }; + + class grid_weight_dct + { + public: + grid_weight_dct() { } + + void init(uint32_t block_width, uint32_t block_height); + + static uint32_t get_num_weight_dc_levels(uint32_t weight_ise_range) + { + float scaled_weight_coding_scale = SCALED_WEIGHT_BASE_CODING_SCALE; + if (weight_ise_range <= astc_helpers::BISE_8_LEVELS) + scaled_weight_coding_scale = 1.0f / 8.0f; + + return (uint32_t)(64.0f * scaled_weight_coding_scale) + 1; + } + + struct block_stats + { + float m_mean_weight; + uint32_t m_total_coded_acs; + uint32_t m_max_ac_coeff; + }; + + bool decode_block_weights( + float q, uint32_t plane_index, // plane of weights to decode and IDCT from stream + astc_helpers::log_astc_block& log_blk, // must be initialized except for the plane weights which are decoded + basist::bitwise_decoder* pDec, + const astc_block_grid_data* pGrid_data, // grid data for this grid size + block_stats* pS, + fvec& dct_work, // thread local + const dct_syms* pSyms = nullptr) const; + + enum { m_zero_run = 3, m_coeff = 2 }; + + uint32_t m_block_width, m_block_height; + + grid_dim_hash_map m_grid_dim_key_vals; + + // Adaptively compensate for weight level quantization noise being fed into the DCT. + // The more coursely the weight levels are quantized, the more noise injected, and the more noise will be spread between multiple AC coefficients. + // This will cause some previously 0 coefficients to increase in mag, but they're likely noise. So carefully nudge the quant step size to compensate. + static float scale_quant_steps(int Q_astc, float gamma = 0.1f /*.13f*/, float clamp_max = 2.0f) + { + assert(Q_astc >= 2); + float factor = 63.0f / (Q_astc - 1); + // TODO: Approximate powf() + float scaled = powf(factor, gamma); + scaled = basisu::clamp(scaled, 1.0f, clamp_max); + return scaled; + } + + float compute_level_scale(float q, float span_len, float weight_gamma, uint32_t grid_width, uint32_t grid_height, uint32_t weight_ise_range) const; + + int sample_quant_table(sample_quant_table_state& state, uint32_t x, uint32_t y) const; + + void compute_quant_table(float q, + uint32_t grid_width, uint32_t grid_height, + float level_scale, int* dct_quant_tab) const; + + float get_max_span_len(const astc_helpers::log_astc_block& log_blk, uint32_t plane_index) const; + + inline int quantize_deadzone(float d, int L, float alpha, uint32_t x, uint32_t y) const + { + assert((x < m_block_width) && (y < m_block_height)); + + if (((x == 1) && (y == 0)) || + ((x == 0) && (y == 1))) + { + return (int)std::round(d / (float)L); + } + + // L = quant step, alpha in [0,1.2] (typical 0.7–0.85) + if (L <= 0) + return 0; + + float s = fabsf(d); + float tau = alpha * float(L); // half-width of the zero band + + if (s <= tau) + return 0; // inside dead-zone towards zero + + // Quantize the residual outside the dead-zone with mid-tread rounding + float qf = (s - tau) / float(L); + int q = (int)floorf(qf + 0.5f); // ties-nearest + return (d < 0.0f) ? -q : q; + } + + inline float dequant_deadzone(int q, int L, float alpha, uint32_t x, uint32_t y) const + { + assert((x < m_block_width) && (y < m_block_height)); + + if (((x == 1) && (y == 0)) || + ((x == 0) && (y == 1))) + { + return (float)q * (float)L; + } + + if (q == 0 || L <= 0) + return 0.0f; + + float tau = alpha * float(L); + float mag = tau + float(abs(q)) * float(L); // center of the (nonzero) bin + return (q < 0) ? -mag : mag; + } + }; + + struct trial_mode + { + uint32_t m_grid_width; + uint32_t m_grid_height; + uint32_t m_cem; + int m_ccs_index; + uint32_t m_endpoint_ise_range; + uint32_t m_weight_ise_range; + uint32_t m_num_parts; + + bool operator==(const trial_mode& other) const + { +#define BU_COMP(a) if (a != other.a) return false; + BU_COMP(m_grid_width); + BU_COMP(m_grid_height); + BU_COMP(m_cem); + BU_COMP(m_ccs_index); + BU_COMP(m_endpoint_ise_range); + BU_COMP(m_weight_ise_range); + BU_COMP(m_num_parts); +#undef BU_COMP + return true; + } + + bool operator<(const trial_mode& rhs) const + { +#define BU_COMP(a) if (a < rhs.a) return true; else if (a > rhs.a) return false; + BU_COMP(m_grid_width); + BU_COMP(m_grid_height); + BU_COMP(m_cem); + BU_COMP(m_ccs_index); + BU_COMP(m_endpoint_ise_range); + BU_COMP(m_weight_ise_range); + BU_COMP(m_num_parts); +#undef BU_COMP + return false; + } + + operator size_t() const + { + size_t h = 0xABC1F419; +#define BU_FIELD(a) do { h ^= hash_hsieh(reinterpret_cast(&a), sizeof(a)); } while(0) + BU_FIELD(m_grid_width); + BU_FIELD(m_grid_height); + BU_FIELD(m_cem); + BU_FIELD(m_ccs_index); + BU_FIELD(m_endpoint_ise_range); + BU_FIELD(m_weight_ise_range); + BU_FIELD(m_num_parts); +#undef BU_FIELD + return h; + } + }; + + // Organize trial modes for faster initial mode triaging. + const uint32_t OTM_NUM_CEMS = 14; // 0-13 (13=highest valid LDR CEM) + const uint32_t OTM_NUM_SUBSETS = 3; // 1-3 + const uint32_t OTM_NUM_CCS = 5; // -1 to 3 + const uint32_t OTM_NUM_GRID_SIZES = 2; // 0=small or 1=large (grid_w>=block_w-1 and grid_h>=block_h-1) + const uint32_t OTM_NUM_GRID_ANISOS = 3; // 0=W=H, 1=W>H, 2=W 0) && (gh > 0)); + assert((bw > 0) && (bh > 0)); + assert((gw <= 12) && (gh <= 12) && (bw <= 12) && (bh <= 12)); + assert((gw <= bw) && (gh <= bh)); + +#if 0 + // Prev. code: + uint32_t grid_aniso = 0; + if (tm.m_grid_width != tm.m_grid_height) // not optimal for non-square block sizes + { + const float grid_x_fract = (float)tm.m_grid_width / (float)block_width; + const float grid_y_fract = (float)tm.m_grid_height / (float)block_height; + if (grid_x_fract >= grid_y_fract) + grid_aniso = 1; + else if (grid_x_fract < grid_y_fract) + grid_aniso = 2; + } +#endif + // Compare gw/bw vs. gh/bh using integer math: + // gw*bh >= gh*bw -> X-dominant (1), else Y-dominant (2) + const uint32_t lhs = gw * bh; + const uint32_t rhs = gh * bw; + + // Equal (isotropic), X=Y + if (lhs == rhs) + return 0; + + // Anisotropic - 1=X, 2=Y + return (lhs >= rhs) ? 1 : 2; + } + + struct grouped_trial_modes + { + basisu::uint_vec m_tm_groups[OTM_NUM_CEMS][OTM_NUM_SUBSETS][OTM_NUM_CCS][OTM_NUM_GRID_SIZES][OTM_NUM_GRID_ANISOS]; // indices of encoder trial modes in each bucket + + void clear() + { + for (uint32_t cem_iter = 0; cem_iter < OTM_NUM_CEMS; cem_iter++) + for (uint32_t subsets_iter = 0; subsets_iter < OTM_NUM_SUBSETS; subsets_iter++) + for (uint32_t ccs_iter = 0; ccs_iter < OTM_NUM_CCS; ccs_iter++) + for (uint32_t grid_sizes_iter = 0; grid_sizes_iter < OTM_NUM_GRID_SIZES; grid_sizes_iter++) + for (uint32_t grid_anisos_iter = 0; grid_anisos_iter < OTM_NUM_GRID_ANISOS; grid_anisos_iter++) + m_tm_groups[cem_iter][subsets_iter][ccs_iter][grid_sizes_iter][grid_anisos_iter].clear(); + } + + void add(uint32_t block_width, uint32_t block_height, + const trial_mode& tm, uint32_t tm_index) + { + const uint32_t cem_index = tm.m_cem; + assert(cem_index < OTM_NUM_CEMS); + + const uint32_t subset_index = tm.m_num_parts - 1; + assert(subset_index < OTM_NUM_SUBSETS); + + const uint32_t ccs_index = tm.m_ccs_index + 1; + assert(ccs_index < OTM_NUM_CCS); + + const uint32_t grid_size = (tm.m_grid_width >= (block_width - 1)) && (tm.m_grid_height >= (block_height - 1)); + const uint32_t grid_aniso = calc_grid_aniso_val(tm.m_grid_width, tm.m_grid_height, block_width, block_height); + + basisu::uint_vec& v = m_tm_groups[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + if (!v.capacity()) + v.reserve(64); + + v.push_back(tm_index); + } + + uint32_t count_used_groups() const + { + uint32_t n = 0; + + for (uint32_t cem_iter = 0; cem_iter < OTM_NUM_CEMS; cem_iter++) + for (uint32_t subsets_iter = 0; subsets_iter < OTM_NUM_SUBSETS; subsets_iter++) + for (uint32_t ccs_iter = 0; ccs_iter < OTM_NUM_CCS; ccs_iter++) + for (uint32_t grid_sizes_iter = 0; grid_sizes_iter < OTM_NUM_GRID_SIZES; grid_sizes_iter++) + for (uint32_t grid_anisos_iter = 0; grid_anisos_iter < OTM_NUM_GRID_ANISOS; grid_anisos_iter++) + { + if (m_tm_groups[cem_iter][subsets_iter][ccs_iter][grid_sizes_iter][grid_anisos_iter].size()) + n++; + } + return n; + } + }; + + extern grouped_trial_modes g_grouped_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + + inline const basisu::uint_vec& get_tm_candidates(const grouped_trial_modes& grouped_enc_trial_modes, + uint32_t cem_index, uint32_t subset_index, uint32_t ccs_index, uint32_t grid_size, uint32_t grid_aniso) + { + assert(cem_index < OTM_NUM_CEMS); + assert(subset_index < OTM_NUM_SUBSETS); + assert(ccs_index < OTM_NUM_CCS); + assert(grid_size < OTM_NUM_GRID_SIZES); + assert(grid_aniso < OTM_NUM_GRID_ANISOS); + + const basisu::uint_vec& modes = grouped_enc_trial_modes.m_tm_groups[cem_index][subset_index][ccs_index][grid_size][grid_aniso]; + return modes; + } + + const uint32_t CFG_PACK_GRID_BITS = 7; + const uint32_t CFG_PACK_CEM_BITS = 3; + const uint32_t CFG_PACK_CCS_BITS = 3; + const uint32_t CFG_PACK_SUBSETS_BITS = 2; + const uint32_t CFG_PACK_WISE_BITS = 4; + const uint32_t CFG_PACK_EISE_BITS = 5; + + extern const int s_unique_ldr_index_to_astc_cem[6]; + + enum class xuastc_mode + { + cMODE_SOLID = 0, + cMODE_RAW = 1, + + // Full cfg, partition ID, and all endpoint value reuse. + cMODE_REUSE_CFG_ENDPOINTS_LEFT = 2, + cMODE_REUSE_CFG_ENDPOINTS_UP = 3, + cMODE_REUSE_CFG_ENDPOINTS_DIAG = 4, + + cMODE_RUN = 5, + + cMODE_TOTAL, + }; + + enum class xuastc_zstd_mode + { + // len=1 bits + cMODE_RAW = 0b0, + + // len=2 bits + cMODE_RUN = 0b01, + + // len=4 bits + cMODE_SOLID = 0b0011, + cMODE_REUSE_CFG_ENDPOINTS_LEFT = 0b0111, + cMODE_REUSE_CFG_ENDPOINTS_UP = 0b1011, + cMODE_REUSE_CFG_ENDPOINTS_DIAG = 0b1111 + }; + + const uint32_t XUASTC_LDR_MODE_BYTE_IS_BASE_OFS_FLAG = 1 << 3; + const uint32_t XUASTC_LDR_MODE_BYTE_PART_HASH_HIT = 1 << 4; + const uint32_t XUASTC_LDR_MODE_BYTE_DPCM_ENDPOINTS_FLAG = 1 << 5; + const uint32_t XUASTC_LDR_MODE_BYTE_TM_HASH_HIT_FLAG = 1 << 6; + const uint32_t XUASTC_LDR_MODE_BYTE_USE_DCT = 1 << 7; + + enum class xuastc_ldr_syntax + { + cFullArith = 0, + cHybridArithZStd = 1, + cFullZStd = 2, + + cTotal + }; + + void create_encoder_trial_modes_table(uint32_t block_width, uint32_t block_height, + basisu::vector& encoder_trial_modes, grouped_trial_modes& grouped_encoder_trial_modes, + bool print_debug_info, bool print_modes); + + extern basisu::vector g_encoder_trial_modes[astc_helpers::cTOTAL_BLOCK_SIZES]; + + inline uint32_t part_hash_index(uint32_t x) + { + // fib hash + return (x * 2654435769u) & (PART_HASH_SIZE - 1); + } + + // Full ZStd syntax only + inline uint32_t tm_hash_index(uint32_t x) + { + // fib hash + return (x * 2654435769u) & (TM_HASH_SIZE - 1); + } + + // TODO: Some fields are unused during transcoding. + struct prev_block_state + { + bool m_was_solid_color; + bool m_used_weight_dct; + bool m_first_endpoint_uses_bc; + bool m_reused_full_cfg; + bool m_used_part_hash; + + int m_tm_index; // -1 if invalid (solid color block) + uint32_t m_base_cem_index; // doesn't include base+ofs + uint32_t m_subset_index, m_ccs_index, m_grid_size, m_grid_aniso; + + prev_block_state() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + }; + + struct prev_block_state_full_zstd + { + int m_tm_index; // -1 if invalid (solid color block) + + bool was_solid_color() const { return m_tm_index < 0; } + + prev_block_state_full_zstd() + { + clear(); + } + + void clear() + { + basisu::clear_obj(*this); + } + }; + + inline uint32_t cem_to_ldrcem_index(uint32_t cem) + { + switch (cem) + { + case astc_helpers::CEM_LDR_LUM_DIRECT: return 0; + case astc_helpers::CEM_LDR_LUM_ALPHA_DIRECT: return 1; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE: return 2; + case astc_helpers::CEM_LDR_RGB_DIRECT: return 3; + case astc_helpers::CEM_LDR_RGB_BASE_PLUS_OFFSET: return 4; + case astc_helpers::CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: return 5; + case astc_helpers::CEM_LDR_RGBA_DIRECT: return 6; + case astc_helpers::CEM_LDR_RGBA_BASE_PLUS_OFFSET: return 7; + default: + assert(0); + break; + } + + return 0; + } + + bool pack_base_offset( + uint32_t cem_index, uint32_t dst_ise_endpoint_range, uint8_t* pPacked_endpoints, + const color_rgba& l, const color_rgba& h, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag, bool& endpoints_swapped); + + bool convert_endpoints_across_cems( + uint32_t prev_cem, uint32_t prev_endpoint_ise_range, const uint8_t* pPrev_endpoints, + uint32_t dst_cem, uint32_t dst_endpoint_ise_range, uint8_t* pDst_endpoints, + bool always_repack, + bool use_blue_contraction, bool auto_disable_blue_contraction_if_clamped, + bool& blue_contraction_clamped_flag, bool& base_ofs_clamped_flag); + + uint32_t get_total_unique_patterns(uint32_t astc_block_size_index, uint32_t num_parts); + //uint16_t unique_pat_index_to_part_seed(uint32_t astc_block_size_index, uint32_t num_parts, uint32_t unique_pat_index); + + typedef bool (*xuastc_decomp_image_init_callback_ptr)(uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t block_width, uint32_t block_height, bool srgb_decode_profile, float dct_q, bool has_alpha, void* pData); + typedef bool (*xuastc_decomp_image_block_callback_ptr)(uint32_t bx, uint32_t by, const astc_helpers::log_astc_block& log_blk, void* pData); + + bool xuastc_ldr_decompress_image( + const uint8_t* pComp_data, size_t comp_data_size, + uint32_t& astc_block_width, uint32_t& astc_block_height, + uint32_t& actual_width, uint32_t& actual_height, bool& has_alpha, bool& uses_srgb_astc_decode_mode, + bool debug_output, + xuastc_decomp_image_init_callback_ptr pInit_callback, void *pInit_callback_data, + xuastc_decomp_image_block_callback_ptr pBlock_callback, void *pBlock_callback_data); + + } // namespace astc_ldr_t + + namespace arith_fastbits_f32 + { + enum { TABLE_BITS = 8 }; // 256..1024 entries typical (8..10) + enum { TABLE_SIZE = 1 << TABLE_BITS }; + enum { MANT_BITS = 23 }; + enum { FRAC_BITS = (int)MANT_BITS - (int)TABLE_BITS }; + enum { FRAC_MASK = (1u << FRAC_BITS) - 1u }; + + extern bool g_initialized; + extern float g_lut_edge[TABLE_SIZE + 1]; // samples at m = 1 + i/TABLE_SIZE (for linear) + + inline void init() + { + if (g_initialized) + return; + + const float inv_ln2 = 1.4426950408889634f; // 1/ln(2) + + for (int i = 0; i <= TABLE_SIZE; ++i) + { + float m = 1.0f + float(i) / float(TABLE_SIZE); // m in [1,2] + g_lut_edge[i] = logf(m) * inv_ln2; // log2(m) + } + + g_initialized = true; + } + + inline void unpack(float p, int& e_unbiased, uint32_t& mant) + { + // kill any denorms + if (p < FLT_MIN) + p = 0; + + union { float f; uint32_t u; } x; + x.f = p; + e_unbiased = int((x.u >> 23) & 0xFF) - 127; + mant = (x.u & 0x7FFFFFu); // 23-bit mantissa + } + + // Returns estimated bits given probability p, approximates -log2f(p). + inline float bits_from_prob_linear(float p) + { + assert((p > 0.0f) && (p <= 1.0f)); + if (!g_initialized) + init(); + + int e; uint32_t mant; + unpack(p, e, mant); + + uint32_t idx = mant >> FRAC_BITS; // 0..TABLE_SIZE-1 + uint32_t frac = mant & FRAC_MASK; // low FRAC_BITS + const float inv_scale = 1.0f / float(1u << FRAC_BITS); + float t = float(frac) * inv_scale; // [0,1) + + float y0 = g_lut_edge[idx]; + float y1 = g_lut_edge[idx + 1]; + float log2m = y0 + t * (y1 - y0); + + return -(float(e) + log2m); + } + + } // namespace arith_fastbits_f32 + + namespace arith + { + // A simple range coder + const uint32_t ArithMaxSyms = 2048; + const uint32_t DMLenShift = 15u; + const uint32_t DMMaxCount = 1u << DMLenShift; + const uint32_t BMLenShift = 13u; + const uint32_t BMMaxCount = 1u << BMLenShift; + const uint32_t ArithMinLen = 1u << 24u; + const uint32_t ArithMaxLen = UINT32_MAX; + const uint32_t ArithMinExpectedDataBufSize = 5; + + class arith_bit_model + { + public: + arith_bit_model() + { + reset(); + } + + void init() + { + reset(); + } + + void reset() + { + m_bit0_count = 1; + m_bit_count = 2; + m_bit0_prob = 1U << (BMLenShift - 1); + m_update_interval = 4; + m_bits_until_update = 4; + } + + float get_price(bool bit) const + { + const float prob_0 = (float)m_bit0_prob / (float)BMMaxCount; + const float prob = bit ? (1.0f - prob_0) : prob_0; + const float bits = arith_fastbits_f32::bits_from_prob_linear(prob); + assert(fabs(bits - (-log2f(prob))) < .00125f); // basic sanity check + return bits; + } + + void update() + { + assert(m_bit_count >= 2); + assert(m_bit0_count < m_bit_count); + + if (m_bit_count >= BMMaxCount) + { + assert(m_bit_count && m_bit0_count); + + m_bit_count = (m_bit_count + 1) >> 1; + m_bit0_count = (m_bit0_count + 1) >> 1; + + if (m_bit0_count == m_bit_count) + ++m_bit_count; + + assert(m_bit0_count < m_bit_count); + } + + const uint32_t scale = 0x80000000U / m_bit_count; + m_bit0_prob = (m_bit0_count * scale) >> (31 - BMLenShift); + + m_update_interval = basisu::clamp((5 * m_update_interval) >> 2, 4u, 128); + + m_bits_until_update = m_update_interval; + } + + void print_prices(const char* pDesc) + { + if (pDesc) + printf("arith_data_model bit prices for model %s:\n", pDesc); + for (uint32_t i = 0; i < 2; i++) + printf("%u: %3.3f bits\n", i, get_price(i)); + printf("\n"); + } + + private: + friend class arith_enc; + friend class arith_dec; + + uint32_t m_bit0_prob; // snapshot made at last update + + uint32_t m_bit0_count; // live + uint32_t m_bit_count; // live + + int m_bits_until_update; + uint32_t m_update_interval; + }; + + enum { cARITH_GAMMA_MAX_TAIL_CTX = 4, cARITH_GAMMA_MAX_PREFIX_CTX = 3 }; + struct arith_gamma_contexts + { + arith_bit_model m_ctx_prefix[cARITH_GAMMA_MAX_PREFIX_CTX]; // for unary continue prefix + arith_bit_model m_ctx_tail[cARITH_GAMMA_MAX_TAIL_CTX]; // for binary suffix bits + }; + + class arith_data_model + { + public: + arith_data_model() : + m_num_data_syms(0), + m_total_sym_freq(0), + m_update_interval(0), + m_num_syms_until_next_update(0) + { + } + + arith_data_model(uint32_t num_syms, bool faster_update = false) : + m_num_data_syms(0), + m_total_sym_freq(0), + m_update_interval(0), + m_num_syms_until_next_update(0) + { + init(num_syms, faster_update); + } + + void clear() + { + m_cum_sym_freqs.clear(); + m_sym_freqs.clear(); + + m_num_data_syms = 0; + m_total_sym_freq = 0; + m_update_interval = 0; + m_num_syms_until_next_update = 0; + } + + void init(uint32_t num_syms, bool faster_update = false) + { + assert((num_syms >= 2) && (num_syms <= ArithMaxSyms)); + + m_num_data_syms = num_syms; + + m_sym_freqs.resize(num_syms); + m_cum_sym_freqs.resize(num_syms + 1); + + reset(faster_update); + } + + void reset(bool faster_update = false) + { + if (!m_num_data_syms) + return; + + m_sym_freqs.set_all(1); + m_total_sym_freq = m_num_data_syms; + + m_update_interval = m_num_data_syms; + m_num_syms_until_next_update = 0; + + update(false); + + if (faster_update) + { + m_update_interval = basisu::clamp((m_num_data_syms + 7) / 8, 4u, (m_num_data_syms + 6) << 3); + m_num_syms_until_next_update = m_update_interval; + } + } + + void update(bool enc_flag) + { + assert(m_num_data_syms); + BASISU_NOTE_UNUSED(enc_flag); + + if (!m_num_data_syms) + return; + + while (m_total_sym_freq >= DMMaxCount) + { + m_total_sym_freq = 0; + + for (uint32_t n = 0; n < m_num_data_syms; n++) + { + m_sym_freqs[n] = (m_sym_freqs[n] + 1u) >> 1u; + m_total_sym_freq += m_sym_freqs[n]; + } + } + + const uint32_t scale = 0x80000000U / m_total_sym_freq; + + uint32_t sum = 0; + for (uint32_t i = 0; i < m_num_data_syms; ++i) + { + assert(((uint64_t)scale * sum) <= UINT32_MAX); + m_cum_sym_freqs[i] = (scale * sum) >> (31 - DMLenShift); + sum += m_sym_freqs[i]; + } + assert(sum == m_total_sym_freq); + + m_cum_sym_freqs[m_num_data_syms] = DMMaxCount; + + m_update_interval = basisu::clamp((5 * m_update_interval) >> 2, 4u, (m_num_data_syms + 6) << 3); + + m_num_syms_until_next_update = m_update_interval; + } + + float get_price(uint32_t sym_index) const + { + assert(sym_index < m_num_data_syms); + + if (sym_index >= m_num_data_syms) + return 0.0f; + + const float prob = (float)(m_cum_sym_freqs[sym_index + 1] - m_cum_sym_freqs[sym_index]) / (float)DMMaxCount; + + const float bits = arith_fastbits_f32::bits_from_prob_linear(prob); + assert(fabs(bits - (-log2f(prob))) < .00125f); // basic sanity check + return bits; + } + + void print_prices(const char* pDesc) + { + if (pDesc) + printf("arith_data_model bit prices for model %s:\n", pDesc); + for (uint32_t i = 0; i < m_num_data_syms; i++) + printf("%u: %3.3f bits\n", i, get_price(i)); + printf("\n"); + } + + uint32_t get_num_data_syms() const { return m_num_data_syms; } + + private: + friend class arith_enc; + friend class arith_dec; + + uint32_t m_num_data_syms; + + basisu::uint_vec m_sym_freqs; // live histogram + uint32_t m_total_sym_freq; // always live vs. m_sym_freqs + + basisu::uint_vec m_cum_sym_freqs; // has 1 extra entry, snapshot from last update + + uint32_t m_update_interval; + int m_num_syms_until_next_update; + + uint32_t get_last_sym_index() const { return m_num_data_syms - 1; } + }; + + class arith_enc + { + public: + arith_enc() + { + clear(); + } + + void clear() + { + m_data_buf.clear(); + + m_base = 0; + m_length = ArithMaxLen; + } + + void init(size_t reserve_size) + { + m_data_buf.reserve(reserve_size); + m_data_buf.resize(0); + + m_base = 0; + m_length = ArithMaxLen; + + // Place 8-bit marker at beginning. + // This virtually always guarantees no backwards carries can be lost at the very beginning of the stream. (Should be impossible with this design.) + // It always pushes out 1 0 byte at the very beginning to absorb future carries. + // Caller does this now, we send a tiny header anyway + //put_bits(0x1, 8); + //assert(m_data_buf[0] != 0xFF); + } + + void put_bit(uint32_t bit) + { + m_length >>= 1; + + if (bit) + { + const uint32_t orig_base = m_base; + + m_base += m_length; + + if (orig_base > m_base) + prop_carry(); + } + + if (m_length < ArithMinLen) + renorm(); + } + + enum { cMaxPutBitsLen = 20 }; + void put_bits(uint32_t val, uint32_t num_bits) + { + assert(num_bits && (num_bits <= cMaxPutBitsLen)); + assert(val < (1u << num_bits)); + + m_length >>= num_bits; + + const uint32_t orig_base = m_base; + + m_base += val * m_length; + + if (orig_base > m_base) + prop_carry(); + + if (m_length < ArithMinLen) + renorm(); + } + + // returns # of bits actually written + inline uint32_t put_truncated_binary(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = basisu::floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + { + put_bits(v, k); + return k; + } + + uint32_t x = v + u; + assert((x >> 1) >= u); + + put_bits(x >> 1, k); + put_bits(x & 1, 1); + return k + 1; + } + + static inline uint32_t get_truncated_binary_bits(uint32_t v, uint32_t n) + { + assert((n >= 2) && (v < n)); + + uint32_t k = basisu::floor_log2i(n); + uint32_t u = (1 << (k + 1)) - n; + + if (v < u) + return k; + +#ifdef _DEBUG + uint32_t x = v + u; + assert((x >> 1) >= u); +#endif + + return k + 1; + } + + inline uint32_t put_rice(uint32_t v, uint32_t m) + { + assert(m); + + uint32_t q = v >> m, r = v & ((1 << m) - 1); + + // rice coding sanity check + assert(q <= 64); + + uint32_t total_bits = q; + + // TODO: put_bits the pattern inverted in bit order + while (q) + { + put_bit(1); + q--; + } + + put_bit(0); + + put_bits(r, m); + + total_bits += (m + 1); + + return total_bits; + } + + static inline uint32_t get_rice_price(uint32_t v, uint32_t m) + { + assert(m); + + uint32_t q = v >> m; + + // rice coding sanity check + assert(q <= 64); + + uint32_t total_bits = q + 1 + m; + + return total_bits; + } + + inline void put_gamma(uint32_t n, arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return; + } + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + encode(1, ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + encode(0, ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + encode(bit, ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + } + } + + inline float put_gamma_and_return_price(uint32_t n, arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return 0.0f; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return 0.0f; + } + + float total_price = 0.0f; + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + { + total_price += ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(1); + encode(1, ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + } + + total_price += ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(0); + encode(0, ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)]); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + total_price += ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)].get_price(bit); + encode(bit, ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + } + + return total_price; + } + + // prediced price, won't be accurate if a binary arith model decides to update in between + inline float get_gamma_price(uint32_t n, const arith_gamma_contexts& ctxs) + { + assert(n); + if (!n) + return 0.0f; + + const int k = basisu::floor_log2i(n); + if (k > 16) + { + assert(0); + return 0.0f; + } + + float total_price = 0.0f; + + // prefix: k times '1' then a '0' + for (int i = 0; i < k; ++i) + total_price += ctxs.m_ctx_prefix[basisu::minimum(i, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(1); + + total_price += ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)].get_price(0); + + // suffix: the k low bits of n + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = (n >> i) & 1u; + total_price += ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)].get_price(bit); + } + + return total_price; + } + + void encode(uint32_t bit, arith_bit_model& dm) + { + uint32_t x = dm.m_bit0_prob * (m_length >> BMLenShift); + + if (!bit) + { + m_length = x; + ++dm.m_bit0_count; + } + else + { + const uint32_t orig_base = m_base; + m_base += x; + m_length -= x; + + if (orig_base > m_base) + prop_carry(); + } + ++dm.m_bit_count; + + if (m_length < ArithMinLen) + renorm(); + + if (--dm.m_bits_until_update <= 0) + dm.update(); + } + + float encode_and_return_price(uint32_t bit, arith_bit_model& dm) + { + const float price = dm.get_price(bit); + encode(bit, dm); + return price; + } + + void encode(uint32_t sym, arith_data_model& dm) + { + assert(sym < dm.m_num_data_syms); + + const uint32_t orig_base = m_base; + + if (sym == dm.get_last_sym_index()) + { + uint32_t x = dm.m_cum_sym_freqs[sym] * (m_length >> DMLenShift); + m_base += x; + m_length -= x; + } + else + { + m_length >>= DMLenShift; + uint32_t x = dm.m_cum_sym_freqs[sym] * m_length; + m_base += x; + m_length = dm.m_cum_sym_freqs[sym + 1] * m_length - x; + } + + if (orig_base > m_base) + prop_carry(); + + if (m_length < ArithMinLen) + renorm(); + + ++dm.m_sym_freqs[sym]; + ++dm.m_total_sym_freq; + + if (--dm.m_num_syms_until_next_update <= 0) + dm.update(true); + } + + float encode_and_return_price(uint32_t sym, arith_data_model& dm) + { + const float price = dm.get_price(sym); + encode(sym, dm); + return price; + } + + void flush() + { + const uint32_t orig_base = m_base; + + if (m_length <= (2 * ArithMinLen)) + { + m_base += ArithMinLen >> 1; + m_length = ArithMinLen >> 9; + } + else + { + m_base += ArithMinLen; + m_length = ArithMinLen >> 1; + } + + if (orig_base > m_base) + prop_carry(); + + renorm(); + + // Pad output to min 5 bytes - quite conservative; we're typically compressing large streams so the overhead shouldn't matter. + if (m_data_buf.size() < ArithMinExpectedDataBufSize) + m_data_buf.resize(ArithMinExpectedDataBufSize); + } + + basisu::uint8_vec& get_data_buf() { return m_data_buf; } + const basisu::uint8_vec& get_data_buf() const { return m_data_buf; } + + private: + basisu::uint8_vec m_data_buf; + uint32_t m_base, m_length; + + inline void prop_carry() + { + int64_t ofs = m_data_buf.size() - 1; + + for (; (ofs >= 0) && (m_data_buf[(size_t)ofs] == 0xFF); --ofs) + m_data_buf[(size_t)ofs] = 0; + + if (ofs >= 0) + ++m_data_buf[(size_t)ofs]; + } + + inline void renorm() + { + assert(m_length < ArithMinLen); + do + { + m_data_buf.push_back((uint8_t)(m_base >> 24u)); + m_base <<= 8u; + m_length <<= 8u; + } while (m_length < ArithMinLen); + } + }; + + class arith_dec + { + public: + arith_dec() + { + clear(); + } + + void clear() + { + m_pData_buf = nullptr; + m_pData_buf_last_byte = nullptr; + m_pData_buf_cur = nullptr; + m_data_buf_size = 0; + + m_value = 0; + m_length = 0; + } + + bool init(const uint8_t* pBuf, size_t buf_size) + { + if (buf_size < ArithMinExpectedDataBufSize) + { + assert(0); + return false; + } + + m_pData_buf = pBuf; + m_pData_buf_last_byte = pBuf + buf_size - 1; + m_pData_buf_cur = m_pData_buf + 4; + m_data_buf_size = buf_size; + + m_value = ((uint32_t)(pBuf[0]) << 24u) | ((uint32_t)(pBuf[1]) << 16u) | ((uint32_t)(pBuf[2]) << 8u) | (uint32_t)(pBuf[3]); + m_length = ArithMaxLen; + + // Check for the 8-bit marker we always place at the beginning of the stream. + //uint32_t marker = get_bits(8); + //if (marker != 0x1) + // return false; + + return true; + } + + uint32_t get_bit() + { + assert(m_data_buf_size); + + m_length >>= 1; + + uint32_t bit = (m_value >= m_length); + + if (bit) + m_value -= m_length; + + if (m_length < ArithMinLen) + renorm(); + + return bit; + } + + enum { cMaxGetBitsLen = 20 }; + + uint32_t get_bits(uint32_t num_bits) + { + assert(m_data_buf_size); + + if ((num_bits < 1) || (num_bits > cMaxGetBitsLen)) + { + assert(0); + return 0; + } + + m_length >>= num_bits; + assert(m_length); + + const uint32_t v = m_value / m_length; + + m_value -= m_length * v; + + if (m_length < ArithMinLen) + renorm(); + + return v; + } + + uint32_t decode_truncated_binary(uint32_t n) + { + assert(n >= 2); + + const uint32_t k = basisu::floor_log2i(n); + const uint32_t u = (1 << (k + 1)) - n; + + uint32_t result = get_bits(k); + + if (result >= u) + result = ((result << 1) | get_bits(1)) - u; + + return result; + } + + uint32_t decode_rice(uint32_t m) + { + assert(m); + + uint32_t q = 0; + for (;;) + { + uint32_t k = get_bit(); + if (!k) + break; + + q++; + if (q > 64) + { + assert(0); + return 0; + } + } + + return (q << m) + get_bits(m); + } + + uint32_t decode_bit(arith_bit_model& dm) + { + assert(m_data_buf_size); + + uint32_t x = dm.m_bit0_prob * (m_length >> BMLenShift); + uint32_t bit = (m_value >= x); + + if (bit == 0) + { + m_length = x; + ++dm.m_bit0_count; + } + else + { + m_value -= x; + m_length -= x; + } + ++dm.m_bit_count; + + if (m_length < ArithMinLen) + renorm(); + + if (--dm.m_bits_until_update <= 0) + dm.update(); + + return bit; + } + + inline uint32_t decode_gamma(arith_gamma_contexts& ctxs) + { + int k = 0; + while (decode_bit(ctxs.m_ctx_prefix[basisu::minimum(k, cARITH_GAMMA_MAX_PREFIX_CTX - 1)])) + { + ++k; + + if (k > 16) + { + // something is very wrong + assert(0); + return 0; + } + } + + int n = 1 << k; + for (int i = k - 1; i >= 0; --i) + { + uint32_t bit = decode_bit(ctxs.m_ctx_tail[basisu::minimum(i, cARITH_GAMMA_MAX_TAIL_CTX - 1)]); + n |= (bit << i); + } + + return n; + } + + uint32_t decode_sym(arith_data_model& dm) + { + assert(m_data_buf_size); + assert(dm.m_num_data_syms); + + uint32_t x = 0, y = m_length; + + m_length >>= DMLenShift; + + uint32_t low_idx = 0, hi_idx = dm.m_num_data_syms; + uint32_t mid_idx = hi_idx >> 1; + + do + { + uint32_t z = m_length * dm.m_cum_sym_freqs[mid_idx]; + + if (z > m_value) + { + hi_idx = mid_idx; + y = z; + } + else + { + low_idx = mid_idx; + x = z; + } + mid_idx = (low_idx + hi_idx) >> 1; + + } while (mid_idx != low_idx); + + m_value -= x; + m_length = y - x; + + if (m_length < ArithMinLen) + renorm(); + + ++dm.m_sym_freqs[low_idx]; + ++dm.m_total_sym_freq; + + if (--dm.m_num_syms_until_next_update <= 0) + dm.update(false); + + return low_idx; + } + + private: + const uint8_t* m_pData_buf; + const uint8_t* m_pData_buf_last_byte; + const uint8_t* m_pData_buf_cur; + size_t m_data_buf_size; + + uint32_t m_value, m_length; + + inline void renorm() + { + do + { + const uint32_t next_byte = (m_pData_buf_cur > m_pData_buf_last_byte) ? 0 : *m_pData_buf_cur++; + + m_value = (m_value << 8u) | next_byte; + + } while ((m_length <<= 8u) < ArithMinLen); + } + }; + + } // namespace arith +#endif // BASISD_SUPPORT_XUASTC + +#if BASISD_SUPPORT_XUASTC + namespace bc7u + { + int determine_bc7_mode(const void* pBlock); + int determine_bc7_mode_4_index_mode(const void* pBlock); + int determine_bc7_mode_4_or_5_rotation(const void* pBlock); + bool unpack_bc7_mode6(const void* pBlock_bits, color_rgba* pPixels); + bool unpack_bc7(const void* pBlock, color_rgba* pPixels); + } // namespace bc7u + + namespace bc7f + { + enum + { + // Low-level BC7 encoder configuration flags. + cPackBC7FlagUse2SubsetsRGB = 1, // use mode 1/3 for RGB blocks + cPackBC7FlagUse2SubsetsRGBA = 2, // use mode 7 for RGBA blocks + + cPackBC7FlagUse3SubsetsRGB = 4, // also use mode 0/2, cPackBC7FlagUse2SubsetsRGB MUST be enabled too + + cPackBC7FlagUseDualPlaneRGB = 8, // enable mode 4/5 usage for RGB blocks + cPackBC7FlagUseDualPlaneRGBA = 16, // enable mode 4/5 usage for RGBA blocks + + cPackBC7FlagPBitOpt = 32, // enable to disable usage of fixed p-bits on some modes; slower + cPackBC7FlagPBitOptMode6 = 64, // enable to disable usage of fixed p-bits on mode 6, alpha on fully opaque blocks may be 254 however; slower + + cPackBC7FlagUseTrivialMode6 = 128, // enable trivial fast mode 6 encoder on blocks with very low variances (highly recommended) + + cPackBC7FlagPartiallyAnalyticalRGB = 256, // partially analytical mode for RGB blocks, slower but higher quality, computes actual SSE's on complex blocks to resolve which mode to use vs. predictions + cPackBC7FlagPartiallyAnalyticalRGBA = 512, // partially analytical mode for RGBA blocks, slower but higher quality, computes actual SSE's on complex blocks to resolve which mode to use vs. predictions + + // Non-analytical is really still partially analytical on the mode pairs (0 vs. 2, 1 vs 3, 4 vs. 5). + cPackBC7FlagNonAnalyticalRGB = 1024, // very slow/brute force, totally abuses the encoder, MUST use with cPackBC7FlagPartiallyAnalyticalRGB flag + cPackBC7FlagNonAnalyticalRGBA = 2048, // very slow/brute force, totally abuses the encoder, MUST use with cPackBC7FlagPartiallyAnalyticalRGBA flag + + // Default to use first: + + // Decent analytical BC7 defaults + cPackBC7FlagDefaultFastest = cPackBC7FlagUseTrivialMode6, // very weak particularly on alpha, mode 6 only for RGB/RGBA, + + // Mode 6 with pbits for RGB, Modes 4,5,6 for alpha. + cPackBC7FlagDefaultFaster = cPackBC7FlagPBitOpt | cPackBC7FlagUseDualPlaneRGBA | cPackBC7FlagUseTrivialMode6, + + cPackBC7FlagDefaultFast = cPackBC7FlagUse2SubsetsRGB | cPackBC7FlagUse2SubsetsRGBA | cPackBC7FlagUseDualPlaneRGBA | + cPackBC7FlagPBitOpt | cPackBC7FlagUseTrivialMode6, + + cPackBC7FlagDefault = (cPackBC7FlagUse2SubsetsRGB | cPackBC7FlagUse2SubsetsRGBA | cPackBC7FlagUse3SubsetsRGB) | + (cPackBC7FlagUseDualPlaneRGB | cPackBC7FlagUseDualPlaneRGBA) | + (cPackBC7FlagPBitOpt | cPackBC7FlagPBitOptMode6) | + cPackBC7FlagUseTrivialMode6, + + // Default partially analytical BC7 defaults (slower) + cPackBC7FlagDefaultPartiallyAnalytical = cPackBC7FlagDefault | (cPackBC7FlagPartiallyAnalyticalRGB | cPackBC7FlagPartiallyAnalyticalRGBA), + + // Default non-analytical BC7 defaults (very slow). In reality the encoder is still analytical on the mode pairs, but at the highest level is non-analytical. + cPackBC7FlagDefaultNonAnalytical = (cPackBC7FlagDefaultPartiallyAnalytical | (cPackBC7FlagNonAnalyticalRGB | cPackBC7FlagNonAnalyticalRGBA)) & ~cPackBC7FlagUseTrivialMode6 + }; + + void init(); + + void fast_pack_bc7_rgb_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + uint32_t fast_pack_bc7_rgb_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + void fast_pack_bc7_rgba_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + uint32_t fast_pack_bc7_rgba_partial_analytical(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + uint32_t fast_pack_bc7_auto_rgba(uint8_t* pBlock, const color_rgba* pPixels, uint32_t flags); + + void print_perf_stats(); + +#if 0 + // Very basic BC7 mode 6 only to ASTC. + void fast_pack_astc(void* pBlock, const color_rgba* pPixels); +#endif + + uint32_t calc_sse(const uint8_t* pBlock, const color_rgba* pPixels); + + } // namespace bc7f + + namespace etc1f + { + struct pack_etc1_state + { + uint64_t m_prev_solid_block; + //decoder_etc_block m_prev_solid_block; + + int m_prev_solid_r8; + int m_prev_solid_g8; + int m_prev_solid_b8; + + pack_etc1_state() + { + clear(); + } + + void clear() + { + m_prev_solid_r8 = -1; + m_prev_solid_g8 = -1; + m_prev_solid_b8 = -1; + } + }; + + void init(); + + void pack_etc1_solid(uint8_t* pBlock, const color_rgba& color, pack_etc1_state& state, bool init_flag = false); + + void pack_etc1(uint8_t* pBlock, const color_rgba* pPixels, pack_etc1_state& state); + + void pack_etc1_grayscale(uint8_t* pBlock, const uint8_t* pPixels, pack_etc1_state& state); + + } // namespace etc1f +#endif // BASISD_SUPPORT_XUASTC + + // Private/internal XUASTC LDR transcoding helpers + + // XUASTC LDR formats only + enum class transcoder_texture_format; + block_format xuastc_get_block_format(transcoder_texture_format tex_fmt); + +#if BASISD_SUPPORT_XUASTC + // Low-quality, but fast, PVRTC1 RGB/RGBA encoder. Power of 2 texture dimensions required. + // Note: Not yet part of our public API: this API may change! + void encode_pvrtc1( + block_format fmt, void* pDst_blocks, + const basisu::vector2D& temp_image, + uint32_t dst_num_blocks_x, uint32_t dst_num_blocks_y, bool from_alpha); + + void transcode_4x4_block( + block_format fmt, // desired output block format + uint32_t block_x, uint32_t block_y, // 4x4 block being processed + void* pDst_blocks, // base pointer to output buffer/bitmap + uint8_t* pDst_block_u8, // pointer to output block/or first pixel to write + const color32* block_pixels, // pointer to 4x4 (16) 32bpp RGBA pixels + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, // output buffer dimensions + int channel0, int channel1, // channels to process, used by some block formats + bool high_quality, bool from_alpha, // Flags specific to certain block formats + uint32_t bc7f_flags, // Real-time bc7f BC7 encoder flags, see bc7f::cPackBC7FlagDefault etc. + etc1f::pack_etc1_state& etc1_pack_state, // etc1f thread local state + int has_alpha = -1); // has_alpha = -1 unknown, 0=definitely no (a all 255's), 1=potentially yes +#endif // BASISD_SUPPORT_XUASTC + + struct bc7_mode_5 + { + union + { + struct + { + uint64_t m_mode : 6; + uint64_t m_rot : 2; + + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 8; + uint64_t m_a1_0 : 6; + + } m_lo; + + uint64_t m_lo_bits; + }; + + union + { + struct + { + uint64_t m_a1_1 : 2; + + // bit 2 + uint64_t m_c00 : 1; + uint64_t m_c10 : 2; + uint64_t m_c20 : 2; + uint64_t m_c30 : 2; + + uint64_t m_c01 : 2; + uint64_t m_c11 : 2; + uint64_t m_c21 : 2; + uint64_t m_c31 : 2; + + uint64_t m_c02 : 2; + uint64_t m_c12 : 2; + uint64_t m_c22 : 2; + uint64_t m_c32 : 2; + + uint64_t m_c03 : 2; + uint64_t m_c13 : 2; + uint64_t m_c23 : 2; + uint64_t m_c33 : 2; + + // bit 33 + uint64_t m_a00 : 1; + uint64_t m_a10 : 2; + uint64_t m_a20 : 2; + uint64_t m_a30 : 2; + + uint64_t m_a01 : 2; + uint64_t m_a11 : 2; + uint64_t m_a21 : 2; + uint64_t m_a31 : 2; + + uint64_t m_a02 : 2; + uint64_t m_a12 : 2; + uint64_t m_a22 : 2; + uint64_t m_a32 : 2; + + uint64_t m_a03 : 2; + uint64_t m_a13 : 2; + uint64_t m_a23 : 2; + uint64_t m_a33 : 2; + + } m_hi; + + uint64_t m_hi_bits; + }; + }; + +} // namespace basist + + + diff --git a/external/basis_universal/webgl/README.md b/external/basis_universal/webgl/README.md index 2d655eb9bc..ddd4baa309 100644 --- a/external/basis_universal/webgl/README.md +++ b/external/basis_universal/webgl/README.md @@ -1,12 +1,44 @@ +# Table of Contents +- [WebGL Examples](#webgl-examples) +- [KTX2 Compression, Transcoding, Display (ktx2_encode_test)](#ktx2-compression-transcoding-display-ktx2_encode_test) +- [Texture Video Sample (video-test)](#texture-video-sample-video_test) +- [Simple Transcoding (texture_test)](#simple-transcoding-texture_test) +- [glTF 3D Model](#gltf-3d-model) +- [Tesing and developing locally](#testing-and-developing-locally) + # WebGL Examples -Requires WebAssembly and WebGL support. The WebGL demos are hosted live [here](https://subquantumtech.com/bu_6x6/). +Requires WebAssembly and WebGL support. The WebGL demos are hosted live [here](https://subquantumtech.com/xu/). + +To build the encoder and transcoder WASM libraries using [Emscripten](https://emscripten.org/), see the README.md files in the [webgl/transcoder](https://github.com/BinomialLLC/basis_universal/tree/master/webgl/transcoder) and [webgl/encoder](https://github.com/BinomialLLC/basis_universal/tree/master/webgl/encoder) folders. The JavaScript API wrappers to the C/C++ library are located in [`webgl/transcoder/basis_wrappers.cpp`](https://github.com/BinomialLLC/basis_universal/blob/master/webgl/transcoder/basis_wrappers.cpp). The JavaScript API is a thin wrapper layered above our C++ API (however not our C API). + +--- + +## KTX2 Compression, Transcoding, Display (ktx2_encode_test) + +Live demo: [`ktx2_encode_test/index.html'](https://subquantumtech.com/xu/ktx2_encode_test/) + +This demo shows how to use the compressor and transcoder from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .ktx2 file in memory which will then be immediately transcoded and rendered as a quad with a WebGL pixel shader used to sample the texture using the GPU. Hit the "Download!" button to locally download the generated .ktx2 file. This sample allows the user to toggle on/off all GPU formats the local device supports and see the results in real-time. + +This sample's UI exposes a large fraction of the C++ compression and transcoding API to the user. It runs on desktop and mobile browsers (but note the UI on mobile isn't great). + +To view the compressor's textual debug output, open your browser's developer debug console (under Developer Tools in Chrome) and enable the Debug checkbox before hitting the "Encode!" button. WASM multithreading and WASM64 are optionally supported, and a browser supporting both are recommended. + +![Screenshot showing the encode_test demo](ktx2_encode_test/preview.png) + +--- -To build the encoder and transcoder WASM libraries using Emscripten, see the various README.md files in the 'webgl/transcoder' and 'webgl/encoder' folders. The Javascript API wrappers to the C/C++ library are located in [`webgl/transcoder/basis_wrappers.cpp`](https://github.com/BinomialLLC/basis_universal/blob/master/webgl/transcoder/basis_wrappers.cpp). +## Texture Video Sample (video_test) -## Transcoder (texture_test) +See [this wiki page](https://github.com/BinomialLLC/basis_universal/wiki/Encoding-ETC1S-and-XUASTC-LDR-Texture-Video). -Live demo: [webgl/texture_test/index.html](https://subquantumtech.com/uastchdr2/texture_test/) +image + +--- + +## Simple Transcoding (texture_test) + +Live demo: [webgl/texture_test/index.html](https://subquantumtech.com/xu/texture_test/) Renders a single texture, using the transcoder (compiled to WASM with emscripten) to generate one of the following compressed texture formats: @@ -17,13 +49,15 @@ Renders a single texture, using the transcoder (compiled to WASM with emscripten * PVRTC 4bpp * BC6H, BC7 -On browsers that don't support any compressed texture format, there's a low-quality fallback code path for opaque LDR textures, and a HDR half float or LDR 32bpp fallback code path for HDR textures. +On browsers that don't support any compressed texture formats, there's a low-quality fallback code path for opaque LDR textures, and a HDR half float or LDR 32bpp fallback code path for HDR textures. ![Screenshot showing a basis texture rendered as a 2D image in a webpage.](texture_test/preview.png) +*Note: This sample doesn't support all ASTC/XUASTC LDR block sizes yet, just 4x4. See the "ktx2_encode_test" or "video_test" samples, which do.* + ## glTF 3D Model -Live demo: [`gltf/index.html`](https://subquantumtech.com/uastchdr2/gltf/) +Live demo: [`gltf/index.html`](https://subquantumtech.com/xu/gltf/) Renders a glTF 3D model with `.basis` texture files, transcoded into one of the following compressed texture formats: @@ -41,17 +75,7 @@ extension that is [currently in development](https://github.com/KhronosGroup/glT ![Screenshot showing a basis texture rendered as the base color texture for a 3D model in a webpage.](gltf/preview.png) -## Compressor (ktx2_encode_test) - -Live demo: [`ktx2_encode_test/index.html'](https://subquantumtech.com/uastchdr2/ktx2_encode_test/) - -This demo shows how to use the compressor from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .ktx2 file in memory which will then be immediately transcoded and displayed. Hit the "Download!" button to locally download the generated .ktx2 file. - -To view the compressor's textual debug output, open your browser's developer debug console (under Developer Tools in Chrome) and enable the Debug checkbox before hitting the "Encode!" button. Multithreading is not currently supported when the compressor is compiled to WebAssembly, so compression will be slower than using the stand-alone command line tool. - -![Screenshot showing the encode_test demo](ktx2_encode_test/preview.png) - -## Testing locally +## Testing and developing locally You can locally host the files under the "webgl" folder. One way is to use [Python to setup a local webserver](https://pythonbasics.org/webserver/) in the 'webgl' directory: @@ -59,4 +83,5 @@ You can locally host the files under the "webgl" folder. One way is to use [Pyth cd webgl python3 -m http.server 8000 ``` -Note: For WASM multithreading to be available and enabled, the server must be properly configured. + +**Note: For WASM multithreading to be available and enabled (which is highly recommended for reasonable compression times), the server [must be properly configured](https://unlimited3d.wordpress.com/2021/12/21/webassembly-and-multi-threading/). See the `webgl/start_webserver.sh` and `webgl/webserver_cross_origin.py` example scripts.** diff --git a/external/basis_universal/webgl/encoder/CMakeLists.txt b/external/basis_universal/webgl/encoder/CMakeLists.txt index 48ada14bf9..e8e5adb141 100644 --- a/external/basis_universal/webgl/encoder/CMakeLists.txt +++ b/external/basis_universal/webgl/encoder/CMakeLists.txt @@ -1,94 +1,136 @@ cmake_minimum_required(VERSION 3.5) - project(basisu_encoder_js) -# The encoder always supports generating KTX2 files, but Zstandard support is optional. If it's disabled, KTX2 UASTC files will always be uncompressed. -# If you know you'll never be encoding UASTC+Zstd KTX2 files you can set KTX2_ZSTANDARD to 0 to reduce the size of the compiled encoder. -option(KTX2_ZSTANDARD "KTX2_ZSTANDARD" TRUE) - +# Toggle Zstd support for KTX2 (ON by default). +option(KTX2_ZSTANDARD "Enable KTX2 Zstandard support" TRUE) message("KTX2_ZSTANDARD=${KTX2_ZSTANDARD}") -if (EMSCRIPTEN) +# Only for Emscripten builds. +if(EMSCRIPTEN) set(CMAKE_CXX_STANDARD 17) + # ---------- Pick config once (single-config generators) ---------- + # Supports: Release (default), Debug, SAN + if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") + endif() + string(TOUPPER "${CMAKE_BUILD_TYPE}" BUILD_MODE) + + # Per-config compile/link flags + set(CONFIG_CFLAGS "") + set(CONFIG_DEFS "") + set(CONFIG_LINK "") + + if(BUILD_MODE STREQUAL "RELEASE") + set(CONFIG_CFLAGS "-O3") + set(CONFIG_DEFS "NDEBUG") + set(CONFIG_LINK "-O3 -s ASSERTIONS=0") + elseif(BUILD_MODE STREQUAL "DEBUG") + set(CONFIG_CFLAGS "-g -O0") + set(CONFIG_DEFS "DEBUG") + set(CONFIG_LINK "-g -s ASSERTIONS=2") + elseif(BUILD_MODE STREQUAL "SAN") + set(CONFIG_CFLAGS "-g -O1 -fsanitize=undefined -fsanitize=address") + set(CONFIG_DEFS "DEBUG") + set(CONFIG_LINK "-g -s ASSERTIONS=2 -fsanitize=undefined -fsanitize=address") + else() + message(WARNING "Unknown CMAKE_BUILD_TYPE='${CMAKE_BUILD_TYPE}', defaulting to Release-like flags.") + set(CONFIG_CFLAGS "-O3") + set(CONFIG_DEFS "NDEBUG") + set(CONFIG_LINK "-O3 -s ASSERTIONS=0") + endif() + + # ---------- Sources (shared) ---------- set(SRC_LIST ../transcoder/basis_wrappers.cpp ../../transcoder/basisu_transcoder.cpp - ../../encoder/basisu_backend.cpp - ../../encoder/basisu_basis_file.cpp - ../../encoder/basisu_comp.cpp - ../../encoder/basisu_enc.cpp - ../../encoder/basisu_etc.cpp - ../../encoder/basisu_frontend.cpp - ../../encoder/basisu_gpu_texture.cpp - ../../encoder/basisu_pvrtc1_4.cpp - ../../encoder/basisu_resampler.cpp - ../../encoder/basisu_resample_filters.cpp - ../../encoder/basisu_ssim.cpp - ../../encoder/basisu_uastc_enc.cpp - ../../encoder/basisu_bc7enc.cpp - ../../encoder/basisu_kernels_sse.cpp - ../../encoder/basisu_opencl.cpp - ../../encoder/pvpngreader.cpp - ../../encoder/jpgd.cpp - ../../encoder/3rdparty/android_astc_decomp.cpp - ../../encoder/basisu_uastc_hdr_4x4_enc.cpp - ../../encoder/basisu_astc_hdr_6x6_enc.cpp - ../../encoder/basisu_astc_hdr_common.cpp - ../../encoder/3rdparty/tinyexr.cpp + ../../encoder/basisu_backend.cpp + ../../encoder/basisu_basis_file.cpp + ../../encoder/basisu_comp.cpp + ../../encoder/basisu_enc.cpp + ../../encoder/basisu_etc.cpp + ../../encoder/basisu_frontend.cpp + ../../encoder/basisu_gpu_texture.cpp + ../../encoder/basisu_pvrtc1_4.cpp + ../../encoder/basisu_resampler.cpp + ../../encoder/basisu_resample_filters.cpp + ../../encoder/basisu_ssim.cpp + ../../encoder/basisu_uastc_enc.cpp + ../../encoder/basisu_bc7enc.cpp + ../../encoder/basisu_kernels_sse.cpp + ../../encoder/basisu_opencl.cpp + ../../encoder/pvpngreader.cpp + ../../encoder/jpgd.cpp + ../../encoder/3rdparty/android_astc_decomp.cpp + ../../encoder/basisu_uastc_hdr_4x4_enc.cpp + ../../encoder/basisu_astc_hdr_6x6_enc.cpp + ../../encoder/basisu_astc_hdr_common.cpp + ../../encoder/basisu_astc_ldr_common.cpp + ../../encoder/basisu_astc_ldr_encode.cpp + ../../encoder/3rdparty/tinyexr.cpp ) - - if (KTX2_ZSTANDARD) - set(SRC_LIST ${SRC_LIST} - ../../zstd/zstd.c - ) - set(ZSTD_DEFINITION BASISD_SUPPORT_KTX2_ZSTD=1) + if(KTX2_ZSTANDARD) + list(APPEND SRC_LIST ../../zstd/zstd.c) + set(ZSTD_DEFINITION BASISD_SUPPORT_KTX2_ZSTD=1) else() - set(ZSTD_DEFINITION BASISD_SUPPORT_KTX2_ZSTD=0) + set(ZSTD_DEFINITION BASISD_SUPPORT_KTX2_ZSTD=0) endif() - # No threading version - add_executable(basis_encoder.js ${SRC_LIST}) - - #target_compile_definitions(basis_encoder.js PRIVATE NDEBUG BASISD_SUPPORT_UASTC=1 BASISD_SUPPORT_BC7=1 BASISD_SUPPORT_ATC=0 BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 BASISD_SUPPORT_PVRTC2=0 BASISD_SUPPORT_FXT1=0 BASISD_SUPPORT_ETC2_EAC_RG11=0 BASISU_SUPPORT_ENCODING=1 BASISU_SUPPORT_SSE=0 ${ZSTD_DEFINITION} ) - #target_compile_options(basis_encoder.js PRIVATE -fno-strict-aliasing -O3) - - #target_compile_definitions(basis_encoder.js PRIVATE DEBUG BASISD_SUPPORT_UASTC=1 BASISD_SUPPORT_BC7=1 BASISD_SUPPORT_ATC=0 BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 BASISD_SUPPORT_PVRTC2=0 BASISD_SUPPORT_FXT1=0 BASISD_SUPPORT_ETC2_EAC_RG11=0 BASISU_SUPPORT_ENCODING=1 BASISU_SUPPORT_SSE=0 ${ZSTD_DEFINITION} ) - #target_compile_options(basis_encoder.js PRIVATE -fno-strict-aliasing -g -O1 -fsanitize=undefined -fsanitize=address) - - # debug options - #target_compile_definitions(basis_encoder.js PRIVATE DEBUG BASISD_SUPPORT_UASTC=1 BASISD_SUPPORT_BC7=1 BASISD_SUPPORT_ATC=0 BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 BASISD_SUPPORT_PVRTC2=0 BASISD_SUPPORT_FXT1=0 BASISD_SUPPORT_ETC2_EAC_RG11=0 BASISU_SUPPORT_ENCODING=1 BASISU_SUPPORT_SSE=0 ${ZSTD_DEFINITION} ) - #target_compile_options(basis_encoder.js PRIVATE -fno-strict-aliasing -g -O0) - - # release options - target_compile_definitions(basis_encoder.js PRIVATE NDEBUG BASISD_SUPPORT_UASTC=1 BASISD_SUPPORT_BC7=1 BASISD_SUPPORT_ATC=0 BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 BASISD_SUPPORT_PVRTC2=0 BASISD_SUPPORT_FXT1=0 BASISD_SUPPORT_ETC2_EAC_RG11=0 BASISU_SUPPORT_ENCODING=1 BASISU_SUPPORT_SSE=0 ${ZSTD_DEFINITION} ) - target_compile_options(basis_encoder.js PRIVATE -fno-strict-aliasing -O3) - - target_include_directories(basis_encoder.js PRIVATE ../../transcoder) - - set_target_properties(basis_encoder.js PROPERTIES - OUTPUT_NAME "basis_encoder" - SUFFIX ".js" - - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -s ALLOW_MEMORY_GROWTH=1 -s STACK_SIZE=262144 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -g -s STACK_SIZE=262144 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -fsanitize=undefined -fsanitize=address -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -g -s STACK_SIZE=262144 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s ASSERTIONS=2 -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - LINK_FLAGS "--bind -s ALLOW_MEMORY_GROWTH=1 -s INITIAL_MEMORY=536870912 -s STACK_SIZE=262144 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s ASSERTIONS=0 -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - - add_executable(basis_encoder_threads.js ${SRC_LIST}) - - # Threaded version - target_compile_definitions(basis_encoder_threads.js PRIVATE NDEBUG BASISD_SUPPORT_UASTC=1 BASISD_SUPPORT_BC7=1 BASISD_SUPPORT_ATC=0 BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 BASISD_SUPPORT_PVRTC2=0 BASISD_SUPPORT_FXT1=0 BASISD_SUPPORT_ETC2_EAC_RG11=0 BASISU_SUPPORT_ENCODING=1 BASISU_SUPPORT_SSE=0 ${ZSTD_DEFINITION} WASM_THREADS_ENABLED=1 ) - target_include_directories(basis_encoder_threads.js PRIVATE ../../transcoder) - - target_compile_options(basis_encoder_threads.js PRIVATE -fno-strict-aliasing -O3 -matomics -mbulk-memory) - - set_target_properties(basis_encoder_threads.js PROPERTIES - OUTPUT_NAME "basis_encoder_threads" - SUFFIX ".js" - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -s ALLOW_MEMORY_GROWTH=1 -s STACK_SIZE=262144 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -g -s STACK_SIZE=262144 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -fsanitize=undefined -fsanitize=address -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - #LINK_FLAGS "--bind -s INITIAL_MEMORY=536870912 -g -s STACK_SIZE=262144 -s ALLOW_MEMORY_GROWTH=1 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s ASSERTIONS=2 -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=18 -s ENVIRONMENT=web,worker -s EXPORTED_RUNTIME_METHODS=['HEAP8']") - LINK_FLAGS "--bind -s ALLOW_MEMORY_GROWTH=1 -s INITIAL_MEMORY=536870912 -O3 -s STACK_SIZE=262144 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s ASSERTIONS=0 -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=18 -s ENVIRONMENT=web,worker -s EXPORTED_RUNTIME_METHODS=['HEAP8']") + # Common preprocessor defines (same as your original) + set(COMMON_DEFS + BASISD_SUPPORT_UASTC=1 + BASISD_SUPPORT_BC7=1 + BASISD_SUPPORT_ATC=0 + BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY=0 + BASISD_SUPPORT_PVRTC2=0 + BASISD_SUPPORT_FXT1=0 + BASISD_SUPPORT_ETC2_EAC_RG11=0 + BASISU_SUPPORT_ENCODING=1 + BASISU_SUPPORT_SSE=0 + BASISD_SUPPORT_XUASTC=1 + ${ZSTD_DEFINITION} + ) + # Base link flags + set(LINK_BASE "--bind -s ALLOW_MEMORY_GROWTH=1 -s INITIAL_MEMORY=536870912 -s STACK_SIZE=2097152 -s MODULARIZE=1 -s EXPORT_NAME=BASIS -s EXPORTED_RUNTIME_METHODS=['HEAP8']") + set(LINK_THREADS "-s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=18 -s ENVIRONMENT=web,worker") + set(LINK_WASM64 "-s MEMORY64=1 -sWASM_BIGINT=1 --profiling-funcs") + + # Helper to avoid repetition + function(add_encoder target out_name use_threads use_wasm64) + add_executable(${target} ${SRC_LIST}) + set_target_properties(${target} PROPERTIES OUTPUT_NAME "${out_name}" SUFFIX ".js") + target_include_directories(${target} PRIVATE ../../transcoder) + + # Compile defs and options + target_compile_definitions(${target} PRIVATE ${COMMON_DEFS} ${CONFIG_DEFS}) + target_compile_options(${target} PRIVATE -fno-strict-aliasing ${CONFIG_CFLAGS}) + + if(${use_threads}) + target_compile_options(${target} PRIVATE -matomics -mbulk-memory) + target_compile_definitions(${target} PRIVATE WASM_THREADS_ENABLED=1) + endif() + + if(${use_wasm64}) + target_compile_options(${target} PRIVATE -s MEMORY64=1) + endif() + + # Link flags (no generator expressions) + set(_lf "${LINK_BASE} ${CONFIG_LINK}") + + if(${use_threads}) + set(_lf "${_lf} ${LINK_THREADS}") + endif() + + if(${use_wasm64}) + set(_lf "${_lf} ${LINK_WASM64} -s INITIAL_MEMORY=1073741824 -s MAXIMUM_MEMORY=12884901888") + endif() + + set_target_properties(${target} PROPERTIES LINK_FLAGS "${_lf}") + endfunction() + + # The three outputs (names unchanged) + add_encoder(basis_encoder.js "basis_encoder" OFF OFF) # wasm32 + add_encoder(basis_encoder_threads.js "basis_encoder_threads" ON OFF) # wasm32 + threads + add_encoder(basis_encoder_threads_wasm64.js "basis_encoder_threads_wasm64" ON ON ) # wasm64 + threads endif() diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder.js b/external/basis_universal/webgl/encoder/build/basis_encoder.js index 845fd0660e..81aeb1d6aa 100644 --- a/external/basis_universal/webgl/encoder/build/basis_encoder.js +++ b/external/basis_universal/webgl/encoder/build/basis_encoder.js @@ -1,6048 +1,2 @@ -// This code implements the `-sMODULARIZE` settings by taking the generated -// JS program code (INNER_JS_CODE) and wrapping it in a factory function. - -// Single threaded MINIMAL_RUNTIME programs do not need access to -// document.currentScript, so a simple export declaration is enough. -var BASIS = (() => { - // When MODULARIZE this JS may be executed later, - // after document.currentScript is gone, so we save it. - // In EXPORT_ES6 mode we can just use 'import.meta.url'. - var _scriptName = globalThis.document?.currentScript?.src; - return async function(moduleArg = {}) { - var moduleRtn; - -// include: shell.js -// include: minimum_runtime_check.js -// end include: minimum_runtime_check.js -// The Module object: Our interface to the outside world. We import -// and export values on it. There are various ways Module can be used: -// 1. Not defined. We create it here -// 2. A function parameter, function(moduleArg) => Promise -// 3. pre-run appended it, var Module = {}; ..generated code.. -// 4. External script tag defines var Module. -// We need to check if Module already exists (e.g. case 3 above). -// Substitution will be replaced with actual code on later stage of the build, -// this way Closure Compiler will not mangle it (e.g. case 4. above). -// Note that if you want to run closure, and also to use Module -// after the generated code, you will need to define var Module = {}; -// before the code. Then that object will be used in the code, and you -// can continue to use Module afterwards as well. -var Module = moduleArg; - -// Determine the runtime environment we are in. You can customize this by -// setting the ENVIRONMENT setting at compile time (see settings.js). - -// Attempt to auto-detect the environment -var ENVIRONMENT_IS_WEB = !!globalThis.window; -var ENVIRONMENT_IS_WORKER = !!globalThis.WorkerGlobalScope; -// N.b. Electron.js environment is simultaneously a NODE-environment, but -// also a web environment. -var ENVIRONMENT_IS_NODE = globalThis.process?.versions?.node && globalThis.process?.type != 'renderer'; -var ENVIRONMENT_IS_SHELL = !ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_NODE && !ENVIRONMENT_IS_WORKER; - -// --pre-jses are emitted after the Module integration code, so that they can -// refer to Module (if they choose; they can also define Module) - - -var arguments_ = []; -var thisProgram = './this.program'; -var quit_ = (status, toThrow) => { - throw toThrow; -}; - -if (typeof __filename != 'undefined') { // Node - _scriptName = __filename; -} else -if (ENVIRONMENT_IS_WORKER) { - _scriptName = self.location.href; -} - -// `/` should be present at the end if `scriptDirectory` is not empty -var scriptDirectory = ''; -function locateFile(path) { - if (Module['locateFile']) { - return Module['locateFile'](path, scriptDirectory); - } - return scriptDirectory + path; -} - -// Hooks that are implemented differently in different runtime environments. -var readAsync, readBinary; - -if (ENVIRONMENT_IS_NODE) { - - // These modules will usually be used on Node.js. Load them eagerly to avoid - // the complexity of lazy-loading. - var fs = require('fs'); - - scriptDirectory = __dirname + '/'; - -// include: node_shell_read.js -readBinary = (filename) => { - // We need to re-wrap `file://` strings to URLs. - filename = isFileURI(filename) ? new URL(filename) : filename; - var ret = fs.readFileSync(filename); - return ret; -}; - -readAsync = async (filename, binary = true) => { - // See the comment in the `readBinary` function. - filename = isFileURI(filename) ? new URL(filename) : filename; - var ret = fs.readFileSync(filename, binary ? undefined : 'utf8'); - return ret; -}; -// end include: node_shell_read.js - if (process.argv.length > 1) { - thisProgram = process.argv[1].replace(/\\/g, '/'); - } - - arguments_ = process.argv.slice(2); - - quit_ = (status, toThrow) => { - process.exitCode = status; - throw toThrow; - }; - -} else - -// Note that this includes Node.js workers when relevant (pthreads is enabled). -// Node.js workers are detected as a combination of ENVIRONMENT_IS_WORKER and -// ENVIRONMENT_IS_NODE. -if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { - try { - scriptDirectory = new URL('.', _scriptName).href; // includes trailing slash - } catch { - // Must be a `blob:` or `data:` URL (e.g. `blob:http://site.com/etc/etc`), we cannot - // infer anything from them. - } - - { -// include: web_or_worker_shell_read.js -if (ENVIRONMENT_IS_WORKER) { - readBinary = (url) => { - var xhr = new XMLHttpRequest(); - xhr.open('GET', url, false); - xhr.responseType = 'arraybuffer'; - xhr.send(null); - return new Uint8Array(/** @type{!ArrayBuffer} */(xhr.response)); - }; - } - - readAsync = async (url) => { - // Fetch has some additional restrictions over XHR, like it can't be used on a file:// url. - // See https://github.com/github/fetch/pull/92#issuecomment-140665932 - // Cordova or Electron apps are typically loaded from a file:// url. - // So use XHR on webview if URL is a file URL. - if (isFileURI(url)) { - return new Promise((resolve, reject) => { - var xhr = new XMLHttpRequest(); - xhr.open('GET', url, true); - xhr.responseType = 'arraybuffer'; - xhr.onload = () => { - if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) { // file URLs can return 0 - resolve(xhr.response); - return; - } - reject(xhr.status); - }; - xhr.onerror = reject; - xhr.send(null); - }); - } - var response = await fetch(url, { credentials: 'same-origin' }); - if (response.ok) { - return response.arrayBuffer(); - } - throw new Error(response.status + ' : ' + response.url); - }; -// end include: web_or_worker_shell_read.js - } -} else -{ -} - -var out = console.log.bind(console); -var err = console.error.bind(console); - -// end include: shell.js - -// include: preamble.js -// === Preamble library stuff === - -// Documentation for the public APIs defined in this file must be updated in: -// site/source/docs/api_reference/preamble.js.rst -// A prebuilt local version of the documentation is available at: -// site/build/text/docs/api_reference/preamble.js.txt -// You can also build docs locally as HTML or other formats in site/ -// An online HTML version (which may be of a different version of Emscripten) -// is up at http://kripken.github.io/emscripten-site/docs/api_reference/preamble.js.html - -var wasmBinary; - -// Wasm globals - -//======================================== -// Runtime essentials -//======================================== - -// whether we are quitting the application. no code should run after this. -// set in exit() and abort() -var ABORT = false; - -// set by exit() and abort(). Passed to 'onExit' handler. -// NOTE: This is also used as the process return code code in shell environments -// but only when noExitRuntime is false. -var EXITSTATUS; - -// In STRICT mode, we only define assert() when ASSERTIONS is set. i.e. we -// don't define it at all in release modes. This matches the behaviour of -// MINIMAL_RUNTIME. -// TODO(sbc): Make this the default even without STRICT enabled. -/** @type {function(*, string=)} */ -function assert(condition, text) { - if (!condition) { - // This build was created without ASSERTIONS defined. `assert()` should not - // ever be called in this configuration but in case there are callers in - // the wild leave this simple abort() implementation here for now. - abort(text); - } -} - -/** - * Indicates whether filename is delivered via file protocol (as opposed to http/https) - * @noinline - */ -var isFileURI = (filename) => filename.startsWith('file://'); - -// include: runtime_common.js -// include: runtime_stack_check.js -// end include: runtime_stack_check.js -// include: runtime_exceptions.js -// end include: runtime_exceptions.js -// include: runtime_debug.js -// end include: runtime_debug.js -var readyPromiseResolve, readyPromiseReject; - -// Memory management -var -/** @type {!Int8Array} */ - HEAP8, -/** @type {!Uint8Array} */ - HEAPU8, -/** @type {!Int16Array} */ - HEAP16, -/** @type {!Uint16Array} */ - HEAPU16, -/** @type {!Int32Array} */ - HEAP32, -/** @type {!Uint32Array} */ - HEAPU32, -/** @type {!Float32Array} */ - HEAPF32, -/** @type {!Float64Array} */ - HEAPF64; - -// BigInt64Array type is not correctly defined in closure -var -/** not-@type {!BigInt64Array} */ - HEAP64, -/* BigUint64Array type is not correctly defined in closure -/** not-@type {!BigUint64Array} */ - HEAPU64; - -var runtimeInitialized = false; - - - -function updateMemoryViews() { - var b = wasmMemory.buffer; - Module['HEAP8'] = HEAP8 = new Int8Array(b); - HEAP16 = new Int16Array(b); - HEAPU8 = new Uint8Array(b); - HEAPU16 = new Uint16Array(b); - HEAP32 = new Int32Array(b); - HEAPU32 = new Uint32Array(b); - HEAPF32 = new Float32Array(b); - HEAPF64 = new Float64Array(b); - HEAP64 = new BigInt64Array(b); - HEAPU64 = new BigUint64Array(b); -} - -// include: memoryprofiler.js -// end include: memoryprofiler.js -// end include: runtime_common.js -function preRun() { - if (Module['preRun']) { - if (typeof Module['preRun'] == 'function') Module['preRun'] = [Module['preRun']]; - while (Module['preRun'].length) { - addOnPreRun(Module['preRun'].shift()); - } - } - // Begin ATPRERUNS hooks - callRuntimeCallbacks(onPreRuns); - // End ATPRERUNS hooks -} - -function initRuntime() { - runtimeInitialized = true; - - // Begin ATINITS hooks - if (!Module['noFSInit'] && !FS.initialized) FS.init(); -TTY.init(); - // End ATINITS hooks - - wasmExports['__wasm_call_ctors'](); - - // Begin ATPOSTCTORS hooks - FS.ignorePermissions = false; - // End ATPOSTCTORS hooks -} - -function postRun() { - // PThreads reuse the runtime from the main thread. - - if (Module['postRun']) { - if (typeof Module['postRun'] == 'function') Module['postRun'] = [Module['postRun']]; - while (Module['postRun'].length) { - addOnPostRun(Module['postRun'].shift()); - } - } - - // Begin ATPOSTRUNS hooks - callRuntimeCallbacks(onPostRuns); - // End ATPOSTRUNS hooks -} - -/** @param {string|number=} what */ -function abort(what) { - Module['onAbort']?.(what); - - what = 'Aborted(' + what + ')'; - // TODO(sbc): Should we remove printing and leave it up to whoever - // catches the exception? - err(what); - - ABORT = true; - - what += '. Build with -sASSERTIONS for more info.'; - - // Use a wasm runtime error, because a JS error might be seen as a foreign - // exception, which means we'd run destructors on it. We need the error to - // simply make the program stop. - // FIXME This approach does not work in Wasm EH because it currently does not assume - // all RuntimeErrors are from traps; it decides whether a RuntimeError is from - // a trap or not based on a hidden field within the object. So at the moment - // we don't have a way of throwing a wasm trap from JS. TODO Make a JS API that - // allows this in the wasm spec. - - // Suppress closure compiler warning here. Closure compiler's builtin extern - // definition for WebAssembly.RuntimeError claims it takes no arguments even - // though it can. - // TODO(https://github.com/google/closure-compiler/pull/3913): Remove if/when upstream closure gets fixed. - /** @suppress {checkTypes} */ - var e = new WebAssembly.RuntimeError(what); - - readyPromiseReject?.(e); - // Throw the error whether or not MODULARIZE is set because abort is used - // in code paths apart from instantiation where an exception is expected - // to be thrown when abort is called. - throw e; -} - -var wasmBinaryFile; - -function findWasmBinary() { - return locateFile('basis_encoder.wasm'); -} - -function getBinarySync(file) { - if (file == wasmBinaryFile && wasmBinary) { - return new Uint8Array(wasmBinary); - } - if (readBinary) { - return readBinary(file); - } - // Throwing a plain string here, even though it not normally adviables since - // this gets turning into an `abort` in instantiateArrayBuffer. - throw 'both async and sync fetching of the wasm failed'; -} - -async function getWasmBinary(binaryFile) { - // If we don't have the binary yet, load it asynchronously using readAsync. - if (!wasmBinary) { - // Fetch the binary using readAsync - try { - var response = await readAsync(binaryFile); - return new Uint8Array(response); - } catch { - // Fall back to getBinarySync below; - } - } - - // Otherwise, getBinarySync should be able to get it synchronously - return getBinarySync(binaryFile); -} - -async function instantiateArrayBuffer(binaryFile, imports) { - try { - var binary = await getWasmBinary(binaryFile); - var instance = await WebAssembly.instantiate(binary, imports); - return instance; - } catch (reason) { - err(`failed to asynchronously prepare wasm: ${reason}`); - - abort(reason); - } -} - -async function instantiateAsync(binary, binaryFile, imports) { - if (!binary - // Don't use streaming for file:// delivered objects in a webview, fetch them synchronously. - && !isFileURI(binaryFile) - // Avoid instantiateStreaming() on Node.js environment for now, as while - // Node.js v18.1.0 implements it, it does not have a full fetch() - // implementation yet. - // - // Reference: - // https://github.com/emscripten-core/emscripten/pull/16917 - && !ENVIRONMENT_IS_NODE - ) { - try { - var response = fetch(binaryFile, { credentials: 'same-origin' }); - var instantiationResult = await WebAssembly.instantiateStreaming(response, imports); - return instantiationResult; - } catch (reason) { - // We expect the most common failure cause to be a bad MIME type for the binary, - // in which case falling back to ArrayBuffer instantiation should work. - err(`wasm streaming compile failed: ${reason}`); - err('falling back to ArrayBuffer instantiation'); - // fall back of instantiateArrayBuffer below - }; - } - return instantiateArrayBuffer(binaryFile, imports); -} - -function getWasmImports() { - // prepare imports - var imports = { - 'env': wasmImports, - 'wasi_snapshot_preview1': wasmImports, - }; - return imports; -} - -// Create the wasm instance. -// Receives the wasm imports, returns the exports. -async function createWasm() { - // Load the wasm module and create an instance of using native support in the JS engine. - // handle a generated wasm instance, receiving its exports and - // performing other necessary setup - /** @param {WebAssembly.Module=} module*/ - function receiveInstance(instance, module) { - wasmExports = instance.exports; - - assignWasmExports(wasmExports); - - updateMemoryViews(); - - return wasmExports; - } - - // Prefer streaming instantiation if available. - function receiveInstantiationResult(result) { - // 'result' is a ResultObject object which has both the module and instance. - // receiveInstance() will swap in the exports (to Module.asm) so they can be called - // TODO: Due to Closure regression https://github.com/google/closure-compiler/issues/3193, the above line no longer optimizes out down to the following line. - // When the regression is fixed, can restore the above PTHREADS-enabled path. - return receiveInstance(result['instance']); - } - - var info = getWasmImports(); - - // User shell pages can write their own Module.instantiateWasm = function(imports, successCallback) callback - // to manually instantiate the Wasm module themselves. This allows pages to - // run the instantiation parallel to any other async startup actions they are - // performing. - // Also pthreads and wasm workers initialize the wasm instance through this - // path. - if (Module['instantiateWasm']) { - return new Promise((resolve, reject) => { - Module['instantiateWasm'](info, (inst, mod) => { - resolve(receiveInstance(inst, mod)); - }); - }); - } - - wasmBinaryFile ??= findWasmBinary(); - var result = await instantiateAsync(wasmBinary, wasmBinaryFile, info); - var exports = receiveInstantiationResult(result); - return exports; -} - -// end include: preamble.js - -// Begin JS library code - - - class ExitStatus { - name = 'ExitStatus'; - constructor(status) { - this.message = `Program terminated with exit(${status})`; - this.status = status; - } - } - - var callRuntimeCallbacks = (callbacks) => { - while (callbacks.length > 0) { - // Pass the module as the first argument. - callbacks.shift()(Module); - } - }; - var onPostRuns = []; - var addOnPostRun = (cb) => onPostRuns.push(cb); - - var onPreRuns = []; - var addOnPreRun = (cb) => onPreRuns.push(cb); - - - - /** - * @param {number} ptr - * @param {string} type - */ - function getValue(ptr, type = 'i8') { - if (type.endsWith('*')) type = '*'; - switch (type) { - case 'i1': return HEAP8[ptr]; - case 'i8': return HEAP8[ptr]; - case 'i16': return HEAP16[((ptr)>>1)]; - case 'i32': return HEAP32[((ptr)>>2)]; - case 'i64': return HEAP64[((ptr)>>3)]; - case 'float': return HEAPF32[((ptr)>>2)]; - case 'double': return HEAPF64[((ptr)>>3)]; - case '*': return HEAPU32[((ptr)>>2)]; - default: abort(`invalid type for getValue: ${type}`); - } - } - - var noExitRuntime = true; - - - /** - * @param {number} ptr - * @param {number} value - * @param {string} type - */ - function setValue(ptr, value, type = 'i8') { - if (type.endsWith('*')) type = '*'; - switch (type) { - case 'i1': HEAP8[ptr] = value; break; - case 'i8': HEAP8[ptr] = value; break; - case 'i16': HEAP16[((ptr)>>1)] = value; break; - case 'i32': HEAP32[((ptr)>>2)] = value; break; - case 'i64': HEAP64[((ptr)>>3)] = BigInt(value); break; - case 'float': HEAPF32[((ptr)>>2)] = value; break; - case 'double': HEAPF64[((ptr)>>3)] = value; break; - case '*': HEAPU32[((ptr)>>2)] = value; break; - default: abort(`invalid type for setValue: ${type}`); - } - } - - var stackRestore = (val) => __emscripten_stack_restore(val); - - var stackSave = () => _emscripten_stack_get_current(); - - - - class ExceptionInfo { - // excPtr - Thrown object pointer to wrap. Metadata pointer is calculated from it. - constructor(excPtr) { - this.excPtr = excPtr; - this.ptr = excPtr - 24; - } - - set_type(type) { - HEAPU32[(((this.ptr)+(4))>>2)] = type; - } - - get_type() { - return HEAPU32[(((this.ptr)+(4))>>2)]; - } - - set_destructor(destructor) { - HEAPU32[(((this.ptr)+(8))>>2)] = destructor; - } - - get_destructor() { - return HEAPU32[(((this.ptr)+(8))>>2)]; - } - - set_caught(caught) { - caught = caught ? 1 : 0; - HEAP8[(this.ptr)+(12)] = caught; - } - - get_caught() { - return HEAP8[(this.ptr)+(12)] != 0; - } - - set_rethrown(rethrown) { - rethrown = rethrown ? 1 : 0; - HEAP8[(this.ptr)+(13)] = rethrown; - } - - get_rethrown() { - return HEAP8[(this.ptr)+(13)] != 0; - } - - // Initialize native structure fields. Should be called once after allocated. - init(type, destructor) { - this.set_adjusted_ptr(0); - this.set_type(type); - this.set_destructor(destructor); - } - - set_adjusted_ptr(adjustedPtr) { - HEAPU32[(((this.ptr)+(16))>>2)] = adjustedPtr; - } - - get_adjusted_ptr() { - return HEAPU32[(((this.ptr)+(16))>>2)]; - } - } - - var exceptionLast = 0; - - var uncaughtExceptionCount = 0; - var ___cxa_throw = (ptr, type, destructor) => { - var info = new ExceptionInfo(ptr); - // Initialize ExceptionInfo content after it was allocated in __cxa_allocate_exception. - info.init(type, destructor); - exceptionLast = ptr; - uncaughtExceptionCount++; - throw exceptionLast; - }; - - var syscallGetVarargI = () => { - // the `+` prepended here is necessary to convince the JSCompiler that varargs is indeed a number. - var ret = HEAP32[((+SYSCALLS.varargs)>>2)]; - SYSCALLS.varargs += 4; - return ret; - }; - var syscallGetVarargP = syscallGetVarargI; - - - var PATH = { - isAbs:(path) => path.charAt(0) === '/', - splitPath:(filename) => { - var splitPathRe = /^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/; - return splitPathRe.exec(filename).slice(1); - }, - normalizeArray:(parts, allowAboveRoot) => { - // if the path tries to go above the root, `up` ends up > 0 - var up = 0; - for (var i = parts.length - 1; i >= 0; i--) { - var last = parts[i]; - if (last === '.') { - parts.splice(i, 1); - } else if (last === '..') { - parts.splice(i, 1); - up++; - } else if (up) { - parts.splice(i, 1); - up--; - } - } - // if the path is allowed to go above the root, restore leading ..s - if (allowAboveRoot) { - for (; up; up--) { - parts.unshift('..'); - } - } - return parts; - }, - normalize:(path) => { - var isAbsolute = PATH.isAbs(path), - trailingSlash = path.slice(-1) === '/'; - // Normalize the path - path = PATH.normalizeArray(path.split('/').filter((p) => !!p), !isAbsolute).join('/'); - if (!path && !isAbsolute) { - path = '.'; - } - if (path && trailingSlash) { - path += '/'; - } - return (isAbsolute ? '/' : '') + path; - }, - dirname:(path) => { - var result = PATH.splitPath(path), - root = result[0], - dir = result[1]; - if (!root && !dir) { - // No dirname whatsoever - return '.'; - } - if (dir) { - // It has a dirname, strip trailing slash - dir = dir.slice(0, -1); - } - return root + dir; - }, - basename:(path) => path && path.match(/([^\/]+|\/)\/*$/)[1], - join:(...paths) => PATH.normalize(paths.join('/')), - join2:(l, r) => PATH.normalize(l + '/' + r), - }; - - var initRandomFill = () => { - // This block is not needed on v19+ since crypto.getRandomValues is builtin - if (ENVIRONMENT_IS_NODE) { - var nodeCrypto = require('crypto'); - return (view) => nodeCrypto.randomFillSync(view); - } - - return (view) => crypto.getRandomValues(view); - }; - var randomFill = (view) => { - // Lazily init on the first invocation. - (randomFill = initRandomFill())(view); - }; - - - - var PATH_FS = { - resolve:(...args) => { - var resolvedPath = '', - resolvedAbsolute = false; - for (var i = args.length - 1; i >= -1 && !resolvedAbsolute; i--) { - var path = (i >= 0) ? args[i] : FS.cwd(); - // Skip empty and invalid entries - if (typeof path != 'string') { - throw new TypeError('Arguments to path.resolve must be strings'); - } else if (!path) { - return ''; // an invalid portion invalidates the whole thing - } - resolvedPath = path + '/' + resolvedPath; - resolvedAbsolute = PATH.isAbs(path); - } - // At this point the path should be resolved to a full absolute path, but - // handle relative paths to be safe (might happen when process.cwd() fails) - resolvedPath = PATH.normalizeArray(resolvedPath.split('/').filter((p) => !!p), !resolvedAbsolute).join('/'); - return ((resolvedAbsolute ? '/' : '') + resolvedPath) || '.'; - }, - relative:(from, to) => { - from = PATH_FS.resolve(from).slice(1); - to = PATH_FS.resolve(to).slice(1); - function trim(arr) { - var start = 0; - for (; start < arr.length; start++) { - if (arr[start] !== '') break; - } - var end = arr.length - 1; - for (; end >= 0; end--) { - if (arr[end] !== '') break; - } - if (start > end) return []; - return arr.slice(start, end - start + 1); - } - var fromParts = trim(from.split('/')); - var toParts = trim(to.split('/')); - var length = Math.min(fromParts.length, toParts.length); - var samePartsLength = length; - for (var i = 0; i < length; i++) { - if (fromParts[i] !== toParts[i]) { - samePartsLength = i; - break; - } - } - var outputParts = []; - for (var i = samePartsLength; i < fromParts.length; i++) { - outputParts.push('..'); - } - outputParts = outputParts.concat(toParts.slice(samePartsLength)); - return outputParts.join('/'); - }, - }; - - - var UTF8Decoder = globalThis.TextDecoder && new TextDecoder(); - - var findStringEnd = (heapOrArray, idx, maxBytesToRead, ignoreNul) => { - var maxIdx = idx + maxBytesToRead; - if (ignoreNul) return maxIdx; - // TextDecoder needs to know the byte length in advance, it doesn't stop on - // null terminator by itself. - // As a tiny code save trick, compare idx against maxIdx using a negation, - // so that maxBytesToRead=undefined/NaN means Infinity. - while (heapOrArray[idx] && !(idx >= maxIdx)) ++idx; - return idx; - }; - - /** - * Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given - * array that contains uint8 values, returns a copy of that string as a - * Javascript String object. - * heapOrArray is either a regular array, or a JavaScript typed array view. - * @param {number=} idx - * @param {number=} maxBytesToRead - * @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character. - * @return {string} - */ - var UTF8ArrayToString = (heapOrArray, idx = 0, maxBytesToRead, ignoreNul) => { - - var endPtr = findStringEnd(heapOrArray, idx, maxBytesToRead, ignoreNul); - - // When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it. - if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) { - return UTF8Decoder.decode(heapOrArray.subarray(idx, endPtr)); - } - var str = ''; - while (idx < endPtr) { - // For UTF8 byte structure, see: - // http://en.wikipedia.org/wiki/UTF-8#Description - // https://www.ietf.org/rfc/rfc2279.txt - // https://tools.ietf.org/html/rfc3629 - var u0 = heapOrArray[idx++]; - if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; } - var u1 = heapOrArray[idx++] & 63; - if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; } - var u2 = heapOrArray[idx++] & 63; - if ((u0 & 0xF0) == 0xE0) { - u0 = ((u0 & 15) << 12) | (u1 << 6) | u2; - } else { - u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heapOrArray[idx++] & 63); - } - - if (u0 < 0x10000) { - str += String.fromCharCode(u0); - } else { - var ch = u0 - 0x10000; - str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF)); - } - } - return str; - }; - - var FS_stdin_getChar_buffer = []; - - var lengthBytesUTF8 = (str) => { - var len = 0; - for (var i = 0; i < str.length; ++i) { - // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code - // unit, not a Unicode code point of the character! So decode - // UTF16->UTF32->UTF8. - // See http://unicode.org/faq/utf_bom.html#utf16-3 - var c = str.charCodeAt(i); // possibly a lead surrogate - if (c <= 0x7F) { - len++; - } else if (c <= 0x7FF) { - len += 2; - } else if (c >= 0xD800 && c <= 0xDFFF) { - len += 4; ++i; - } else { - len += 3; - } - } - return len; - }; - - var stringToUTF8Array = (str, heap, outIdx, maxBytesToWrite) => { - // Parameter maxBytesToWrite is not optional. Negative values, 0, null, - // undefined and false each don't write out any bytes. - if (!(maxBytesToWrite > 0)) - return 0; - - var startIdx = outIdx; - var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator. - for (var i = 0; i < str.length; ++i) { - // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description - // and https://www.ietf.org/rfc/rfc2279.txt - // and https://tools.ietf.org/html/rfc3629 - var u = str.codePointAt(i); - if (u <= 0x7F) { - if (outIdx >= endIdx) break; - heap[outIdx++] = u; - } else if (u <= 0x7FF) { - if (outIdx + 1 >= endIdx) break; - heap[outIdx++] = 0xC0 | (u >> 6); - heap[outIdx++] = 0x80 | (u & 63); - } else if (u <= 0xFFFF) { - if (outIdx + 2 >= endIdx) break; - heap[outIdx++] = 0xE0 | (u >> 12); - heap[outIdx++] = 0x80 | ((u >> 6) & 63); - heap[outIdx++] = 0x80 | (u & 63); - } else { - if (outIdx + 3 >= endIdx) break; - heap[outIdx++] = 0xF0 | (u >> 18); - heap[outIdx++] = 0x80 | ((u >> 12) & 63); - heap[outIdx++] = 0x80 | ((u >> 6) & 63); - heap[outIdx++] = 0x80 | (u & 63); - // Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16. - // We need to manually skip over the second code unit for correct iteration. - i++; - } - } - // Null-terminate the pointer to the buffer. - heap[outIdx] = 0; - return outIdx - startIdx; - }; - /** @type {function(string, boolean=, number=)} */ - var intArrayFromString = (stringy, dontAddNull, length) => { - var len = length > 0 ? length : lengthBytesUTF8(stringy)+1; - var u8array = new Array(len); - var numBytesWritten = stringToUTF8Array(stringy, u8array, 0, u8array.length); - if (dontAddNull) u8array.length = numBytesWritten; - return u8array; - }; - var FS_stdin_getChar = () => { - if (!FS_stdin_getChar_buffer.length) { - var result = null; - if (ENVIRONMENT_IS_NODE) { - // we will read data by chunks of BUFSIZE - var BUFSIZE = 256; - var buf = Buffer.alloc(BUFSIZE); - var bytesRead = 0; - - // For some reason we must suppress a closure warning here, even though - // fd definitely exists on process.stdin, and is even the proper way to - // get the fd of stdin, - // https://github.com/nodejs/help/issues/2136#issuecomment-523649904 - // This started to happen after moving this logic out of library_tty.js, - // so it is related to the surrounding code in some unclear manner. - /** @suppress {missingProperties} */ - var fd = process.stdin.fd; - - try { - bytesRead = fs.readSync(fd, buf, 0, BUFSIZE); - } catch(e) { - // Cross-platform differences: on Windows, reading EOF throws an - // exception, but on other OSes, reading EOF returns 0. Uniformize - // behavior by treating the EOF exception to return 0. - if (e.toString().includes('EOF')) bytesRead = 0; - else throw e; - } - - if (bytesRead > 0) { - result = buf.slice(0, bytesRead).toString('utf-8'); - } - } else - if (globalThis.window?.prompt) { - // Browser. - result = window.prompt('Input: '); // returns null on cancel - if (result !== null) { - result += '\n'; - } - } else - {} - if (!result) { - return null; - } - FS_stdin_getChar_buffer = intArrayFromString(result, true); - } - return FS_stdin_getChar_buffer.shift(); - }; - var TTY = { - ttys:[], - init() { - // https://github.com/emscripten-core/emscripten/pull/1555 - // if (ENVIRONMENT_IS_NODE) { - // // currently, FS.init does not distinguish if process.stdin is a file or TTY - // // device, it always assumes it's a TTY device. because of this, we're forcing - // // process.stdin to UTF8 encoding to at least make stdin reading compatible - // // with text files until FS.init can be refactored. - // process.stdin.setEncoding('utf8'); - // } - }, - shutdown() { - // https://github.com/emscripten-core/emscripten/pull/1555 - // if (ENVIRONMENT_IS_NODE) { - // // inolen: any idea as to why node -e 'process.stdin.read()' wouldn't exit immediately (with process.stdin being a tty)? - // // isaacs: because now it's reading from the stream, you've expressed interest in it, so that read() kicks off a _read() which creates a ReadReq operation - // // inolen: I thought read() in that case was a synchronous operation that just grabbed some amount of buffered data if it exists? - // // isaacs: it is. but it also triggers a _read() call, which calls readStart() on the handle - // // isaacs: do process.stdin.pause() and i'd think it'd probably close the pending call - // process.stdin.pause(); - // } - }, - register(dev, ops) { - TTY.ttys[dev] = { input: [], output: [], ops: ops }; - FS.registerDevice(dev, TTY.stream_ops); - }, - stream_ops:{ - open(stream) { - var tty = TTY.ttys[stream.node.rdev]; - if (!tty) { - throw new FS.ErrnoError(43); - } - stream.tty = tty; - stream.seekable = false; - }, - close(stream) { - // flush any pending line data - stream.tty.ops.fsync(stream.tty); - }, - fsync(stream) { - stream.tty.ops.fsync(stream.tty); - }, - read(stream, buffer, offset, length, pos /* ignored */) { - if (!stream.tty || !stream.tty.ops.get_char) { - throw new FS.ErrnoError(60); - } - var bytesRead = 0; - for (var i = 0; i < length; i++) { - var result; - try { - result = stream.tty.ops.get_char(stream.tty); - } catch (e) { - throw new FS.ErrnoError(29); - } - if (result === undefined && bytesRead === 0) { - throw new FS.ErrnoError(6); - } - if (result === null || result === undefined) break; - bytesRead++; - buffer[offset+i] = result; - } - if (bytesRead) { - stream.node.atime = Date.now(); - } - return bytesRead; - }, - write(stream, buffer, offset, length, pos) { - if (!stream.tty || !stream.tty.ops.put_char) { - throw new FS.ErrnoError(60); - } - try { - for (var i = 0; i < length; i++) { - stream.tty.ops.put_char(stream.tty, buffer[offset+i]); - } - } catch (e) { - throw new FS.ErrnoError(29); - } - if (length) { - stream.node.mtime = stream.node.ctime = Date.now(); - } - return i; - }, - }, - default_tty_ops:{ - get_char(tty) { - return FS_stdin_getChar(); - }, - put_char(tty, val) { - if (val === null || val === 10) { - out(UTF8ArrayToString(tty.output)); - tty.output = []; - } else { - if (val != 0) tty.output.push(val); // val == 0 would cut text output off in the middle. - } - }, - fsync(tty) { - if (tty.output?.length > 0) { - out(UTF8ArrayToString(tty.output)); - tty.output = []; - } - }, - ioctl_tcgets(tty) { - // typical setting - return { - c_iflag: 25856, - c_oflag: 5, - c_cflag: 191, - c_lflag: 35387, - c_cc: [ - 0x03, 0x1c, 0x7f, 0x15, 0x04, 0x00, 0x01, 0x00, 0x11, 0x13, 0x1a, 0x00, - 0x12, 0x0f, 0x17, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - ] - }; - }, - ioctl_tcsets(tty, optional_actions, data) { - // currently just ignore - return 0; - }, - ioctl_tiocgwinsz(tty) { - return [24, 80]; - }, - }, - default_tty1_ops:{ - put_char(tty, val) { - if (val === null || val === 10) { - err(UTF8ArrayToString(tty.output)); - tty.output = []; - } else { - if (val != 0) tty.output.push(val); - } - }, - fsync(tty) { - if (tty.output?.length > 0) { - err(UTF8ArrayToString(tty.output)); - tty.output = []; - } - }, - }, - }; - - - var zeroMemory = (ptr, size) => HEAPU8.fill(0, ptr, ptr + size); - - var alignMemory = (size, alignment) => { - return Math.ceil(size / alignment) * alignment; - }; - var mmapAlloc = (size) => { - size = alignMemory(size, 65536); - var ptr = _emscripten_builtin_memalign(65536, size); - if (ptr) zeroMemory(ptr, size); - return ptr; - }; - var MEMFS = { - ops_table:null, - mount(mount) { - return MEMFS.createNode(null, '/', 16895, 0); - }, - createNode(parent, name, mode, dev) { - if (FS.isBlkdev(mode) || FS.isFIFO(mode)) { - // no supported - throw new FS.ErrnoError(63); - } - MEMFS.ops_table ||= { - dir: { - node: { - getattr: MEMFS.node_ops.getattr, - setattr: MEMFS.node_ops.setattr, - lookup: MEMFS.node_ops.lookup, - mknod: MEMFS.node_ops.mknod, - rename: MEMFS.node_ops.rename, - unlink: MEMFS.node_ops.unlink, - rmdir: MEMFS.node_ops.rmdir, - readdir: MEMFS.node_ops.readdir, - symlink: MEMFS.node_ops.symlink - }, - stream: { - llseek: MEMFS.stream_ops.llseek - } - }, - file: { - node: { - getattr: MEMFS.node_ops.getattr, - setattr: MEMFS.node_ops.setattr - }, - stream: { - llseek: MEMFS.stream_ops.llseek, - read: MEMFS.stream_ops.read, - write: MEMFS.stream_ops.write, - mmap: MEMFS.stream_ops.mmap, - msync: MEMFS.stream_ops.msync - } - }, - link: { - node: { - getattr: MEMFS.node_ops.getattr, - setattr: MEMFS.node_ops.setattr, - readlink: MEMFS.node_ops.readlink - }, - stream: {} - }, - chrdev: { - node: { - getattr: MEMFS.node_ops.getattr, - setattr: MEMFS.node_ops.setattr - }, - stream: FS.chrdev_stream_ops - } - }; - var node = FS.createNode(parent, name, mode, dev); - if (FS.isDir(node.mode)) { - node.node_ops = MEMFS.ops_table.dir.node; - node.stream_ops = MEMFS.ops_table.dir.stream; - node.contents = {}; - } else if (FS.isFile(node.mode)) { - node.node_ops = MEMFS.ops_table.file.node; - node.stream_ops = MEMFS.ops_table.file.stream; - node.usedBytes = 0; // The actual number of bytes used in the typed array, as opposed to contents.length which gives the whole capacity. - // When the byte data of the file is populated, this will point to either a typed array, or a normal JS array. Typed arrays are preferred - // for performance, and used by default. However, typed arrays are not resizable like normal JS arrays are, so there is a small disk size - // penalty involved for appending file writes that continuously grow a file similar to std::vector capacity vs used -scheme. - node.contents = null; - } else if (FS.isLink(node.mode)) { - node.node_ops = MEMFS.ops_table.link.node; - node.stream_ops = MEMFS.ops_table.link.stream; - } else if (FS.isChrdev(node.mode)) { - node.node_ops = MEMFS.ops_table.chrdev.node; - node.stream_ops = MEMFS.ops_table.chrdev.stream; - } - node.atime = node.mtime = node.ctime = Date.now(); - // add the new node to the parent - if (parent) { - parent.contents[name] = node; - parent.atime = parent.mtime = parent.ctime = node.atime; - } - return node; - }, - getFileDataAsTypedArray(node) { - if (!node.contents) return new Uint8Array(0); - if (node.contents.subarray) return node.contents.subarray(0, node.usedBytes); // Make sure to not return excess unused bytes. - return new Uint8Array(node.contents); - }, - expandFileStorage(node, newCapacity) { - var prevCapacity = node.contents ? node.contents.length : 0; - if (prevCapacity >= newCapacity) return; // No need to expand, the storage was already large enough. - // Don't expand strictly to the given requested limit if it's only a very small increase, but instead geometrically grow capacity. - // For small filesizes (<1MB), perform size*2 geometric increase, but for large sizes, do a much more conservative size*1.125 increase to - // avoid overshooting the allocation cap by a very large margin. - var CAPACITY_DOUBLING_MAX = 1024 * 1024; - newCapacity = Math.max(newCapacity, (prevCapacity * (prevCapacity < CAPACITY_DOUBLING_MAX ? 2.0 : 1.125)) >>> 0); - if (prevCapacity != 0) newCapacity = Math.max(newCapacity, 256); // At minimum allocate 256b for each file when expanding. - var oldContents = node.contents; - node.contents = new Uint8Array(newCapacity); // Allocate new storage. - if (node.usedBytes > 0) node.contents.set(oldContents.subarray(0, node.usedBytes), 0); // Copy old data over to the new storage. - }, - resizeFileStorage(node, newSize) { - if (node.usedBytes == newSize) return; - if (newSize == 0) { - node.contents = null; // Fully decommit when requesting a resize to zero. - node.usedBytes = 0; - } else { - var oldContents = node.contents; - node.contents = new Uint8Array(newSize); // Allocate new storage. - if (oldContents) { - node.contents.set(oldContents.subarray(0, Math.min(newSize, node.usedBytes))); // Copy old data over to the new storage. - } - node.usedBytes = newSize; - } - }, - node_ops:{ - getattr(node) { - var attr = {}; - // device numbers reuse inode numbers. - attr.dev = FS.isChrdev(node.mode) ? node.id : 1; - attr.ino = node.id; - attr.mode = node.mode; - attr.nlink = 1; - attr.uid = 0; - attr.gid = 0; - attr.rdev = node.rdev; - if (FS.isDir(node.mode)) { - attr.size = 4096; - } else if (FS.isFile(node.mode)) { - attr.size = node.usedBytes; - } else if (FS.isLink(node.mode)) { - attr.size = node.link.length; - } else { - attr.size = 0; - } - attr.atime = new Date(node.atime); - attr.mtime = new Date(node.mtime); - attr.ctime = new Date(node.ctime); - // NOTE: In our implementation, st_blocks = Math.ceil(st_size/st_blksize), - // but this is not required by the standard. - attr.blksize = 4096; - attr.blocks = Math.ceil(attr.size / attr.blksize); - return attr; - }, - setattr(node, attr) { - for (const key of ["mode", "atime", "mtime", "ctime"]) { - if (attr[key] != null) { - node[key] = attr[key]; - } - } - if (attr.size !== undefined) { - MEMFS.resizeFileStorage(node, attr.size); - } - }, - lookup(parent, name) { - // This error may happen quite a bit. To avoid overhead we reuse it (and - // suffer a lack of stack info). - if (!MEMFS.doesNotExistError) { - MEMFS.doesNotExistError = new FS.ErrnoError(44); - /** @suppress {checkTypes} */ - MEMFS.doesNotExistError.stack = ''; - } - throw MEMFS.doesNotExistError; - }, - mknod(parent, name, mode, dev) { - return MEMFS.createNode(parent, name, mode, dev); - }, - rename(old_node, new_dir, new_name) { - var new_node; - try { - new_node = FS.lookupNode(new_dir, new_name); - } catch (e) {} - if (new_node) { - if (FS.isDir(old_node.mode)) { - // if we're overwriting a directory at new_name, make sure it's empty. - for (var i in new_node.contents) { - throw new FS.ErrnoError(55); - } - } - FS.hashRemoveNode(new_node); - } - // do the internal rewiring - delete old_node.parent.contents[old_node.name]; - new_dir.contents[new_name] = old_node; - old_node.name = new_name; - new_dir.ctime = new_dir.mtime = old_node.parent.ctime = old_node.parent.mtime = Date.now(); - }, - unlink(parent, name) { - delete parent.contents[name]; - parent.ctime = parent.mtime = Date.now(); - }, - rmdir(parent, name) { - var node = FS.lookupNode(parent, name); - for (var i in node.contents) { - throw new FS.ErrnoError(55); - } - delete parent.contents[name]; - parent.ctime = parent.mtime = Date.now(); - }, - readdir(node) { - return ['.', '..', ...Object.keys(node.contents)]; - }, - symlink(parent, newname, oldpath) { - var node = MEMFS.createNode(parent, newname, 0o777 | 40960, 0); - node.link = oldpath; - return node; - }, - readlink(node) { - if (!FS.isLink(node.mode)) { - throw new FS.ErrnoError(28); - } - return node.link; - }, - }, - stream_ops:{ - read(stream, buffer, offset, length, position) { - var contents = stream.node.contents; - if (position >= stream.node.usedBytes) return 0; - var size = Math.min(stream.node.usedBytes - position, length); - if (size > 8 && contents.subarray) { // non-trivial, and typed array - buffer.set(contents.subarray(position, position + size), offset); - } else { - for (var i = 0; i < size; i++) buffer[offset + i] = contents[position + i]; - } - return size; - }, - write(stream, buffer, offset, length, position, canOwn) { - // If the buffer is located in main memory (HEAP), and if - // memory can grow, we can't hold on to references of the - // memory buffer, as they may get invalidated. That means we - // need to do copy its contents. - if (buffer.buffer === HEAP8.buffer) { - canOwn = false; - } - - if (!length) return 0; - var node = stream.node; - node.mtime = node.ctime = Date.now(); - - if (buffer.subarray && (!node.contents || node.contents.subarray)) { // This write is from a typed array to a typed array? - if (canOwn) { - node.contents = buffer.subarray(offset, offset + length); - node.usedBytes = length; - return length; - } else if (node.usedBytes === 0 && position === 0) { // If this is a simple first write to an empty file, do a fast set since we don't need to care about old data. - node.contents = buffer.slice(offset, offset + length); - node.usedBytes = length; - return length; - } else if (position + length <= node.usedBytes) { // Writing to an already allocated and used subrange of the file? - node.contents.set(buffer.subarray(offset, offset + length), position); - return length; - } - } - - // Appending to an existing file and we need to reallocate, or source data did not come as a typed array. - MEMFS.expandFileStorage(node, position+length); - if (node.contents.subarray && buffer.subarray) { - // Use typed array write which is available. - node.contents.set(buffer.subarray(offset, offset + length), position); - } else { - for (var i = 0; i < length; i++) { - node.contents[position + i] = buffer[offset + i]; // Or fall back to manual write if not. - } - } - node.usedBytes = Math.max(node.usedBytes, position + length); - return length; - }, - llseek(stream, offset, whence) { - var position = offset; - if (whence === 1) { - position += stream.position; - } else if (whence === 2) { - if (FS.isFile(stream.node.mode)) { - position += stream.node.usedBytes; - } - } - if (position < 0) { - throw new FS.ErrnoError(28); - } - return position; - }, - mmap(stream, length, position, prot, flags) { - if (!FS.isFile(stream.node.mode)) { - throw new FS.ErrnoError(43); - } - var ptr; - var allocated; - var contents = stream.node.contents; - // Only make a new copy when MAP_PRIVATE is specified. - if (!(flags & 2) && contents && contents.buffer === HEAP8.buffer) { - // We can't emulate MAP_SHARED when the file is not backed by the - // buffer we're mapping to (e.g. the HEAP buffer). - allocated = false; - ptr = contents.byteOffset; - } else { - allocated = true; - ptr = mmapAlloc(length); - if (!ptr) { - throw new FS.ErrnoError(48); - } - if (contents) { - // Try to avoid unnecessary slices. - if (position > 0 || position + length < contents.length) { - if (contents.subarray) { - contents = contents.subarray(position, position + length); - } else { - contents = Array.prototype.slice.call(contents, position, position + length); - } - } - HEAP8.set(contents, ptr); - } - } - return { ptr, allocated }; - }, - msync(stream, buffer, offset, length, mmapFlags) { - MEMFS.stream_ops.write(stream, buffer, 0, length, offset, false); - // should we check if bytesWritten and length are the same? - return 0; - }, - }, - }; - - var FS_modeStringToFlags = (str) => { - var flagModes = { - 'r': 0, - 'r+': 2, - 'w': 512 | 64 | 1, - 'w+': 512 | 64 | 2, - 'a': 1024 | 64 | 1, - 'a+': 1024 | 64 | 2, - }; - var flags = flagModes[str]; - if (typeof flags == 'undefined') { - throw new Error(`Unknown file open mode: ${str}`); - } - return flags; - }; - - var FS_getMode = (canRead, canWrite) => { - var mode = 0; - if (canRead) mode |= 292 | 73; - if (canWrite) mode |= 146; - return mode; - }; - - - var asyncLoad = async (url) => { - var arrayBuffer = await readAsync(url); - return new Uint8Array(arrayBuffer); - }; - - - var FS_createDataFile = (...args) => FS.createDataFile(...args); - - var getUniqueRunDependency = (id) => { - return id; - }; - - var runDependencies = 0; - - - var dependenciesFulfilled = null; - var removeRunDependency = (id) => { - runDependencies--; - - Module['monitorRunDependencies']?.(runDependencies); - - if (runDependencies == 0) { - if (dependenciesFulfilled) { - var callback = dependenciesFulfilled; - dependenciesFulfilled = null; - callback(); // can add another dependenciesFulfilled - } - } - }; - var addRunDependency = (id) => { - runDependencies++; - - Module['monitorRunDependencies']?.(runDependencies); - - }; - - - var preloadPlugins = []; - var FS_handledByPreloadPlugin = async (byteArray, fullname) => { - // Ensure plugins are ready. - if (typeof Browser != 'undefined') Browser.init(); - - for (var plugin of preloadPlugins) { - if (plugin['canHandle'](fullname)) { - return plugin['handle'](byteArray, fullname); - } - } - // In no plugin handled this file then return the original/unmodified - // byteArray. - return byteArray; - }; - var FS_preloadFile = async (parent, name, url, canRead, canWrite, dontCreateFile, canOwn, preFinish) => { - // TODO we should allow people to just pass in a complete filename instead - // of parent and name being that we just join them anyways - var fullname = name ? PATH_FS.resolve(PATH.join2(parent, name)) : parent; - var dep = getUniqueRunDependency(`cp ${fullname}`); // might have several active requests for the same fullname - addRunDependency(dep); - - try { - var byteArray = url; - if (typeof url == 'string') { - byteArray = await asyncLoad(url); - } - - byteArray = await FS_handledByPreloadPlugin(byteArray, fullname); - preFinish?.(); - if (!dontCreateFile) { - FS_createDataFile(parent, name, byteArray, canRead, canWrite, canOwn); - } - } finally { - removeRunDependency(dep); - } - }; - var FS_createPreloadedFile = (parent, name, url, canRead, canWrite, onload, onerror, dontCreateFile, canOwn, preFinish) => { - FS_preloadFile(parent, name, url, canRead, canWrite, dontCreateFile, canOwn, preFinish).then(onload).catch(onerror); - }; - var FS = { - root:null, - mounts:[], - devices:{ - }, - streams:[], - nextInode:1, - nameTable:null, - currentPath:"/", - initialized:false, - ignorePermissions:true, - filesystems:null, - syncFSRequests:0, - readFiles:{ - }, - ErrnoError:class { - name = 'ErrnoError'; - // We set the `name` property to be able to identify `FS.ErrnoError` - // - the `name` is a standard ECMA-262 property of error objects. Kind of good to have it anyway. - // - when using PROXYFS, an error can come from an underlying FS - // as different FS objects have their own FS.ErrnoError each, - // the test `err instanceof FS.ErrnoError` won't detect an error coming from another filesystem, causing bugs. - // we'll use the reliable test `err.name == "ErrnoError"` instead - constructor(errno) { - this.errno = errno; - } - }, - FSStream:class { - shared = {}; - get object() { - return this.node; - } - set object(val) { - this.node = val; - } - get isRead() { - return (this.flags & 2097155) !== 1; - } - get isWrite() { - return (this.flags & 2097155) !== 0; - } - get isAppend() { - return (this.flags & 1024); - } - get flags() { - return this.shared.flags; - } - set flags(val) { - this.shared.flags = val; - } - get position() { - return this.shared.position; - } - set position(val) { - this.shared.position = val; - } - }, - FSNode:class { - node_ops = {}; - stream_ops = {}; - readMode = 292 | 73; - writeMode = 146; - mounted = null; - constructor(parent, name, mode, rdev) { - if (!parent) { - parent = this; // root node sets parent to itself - } - this.parent = parent; - this.mount = parent.mount; - this.id = FS.nextInode++; - this.name = name; - this.mode = mode; - this.rdev = rdev; - this.atime = this.mtime = this.ctime = Date.now(); - } - get read() { - return (this.mode & this.readMode) === this.readMode; - } - set read(val) { - val ? this.mode |= this.readMode : this.mode &= ~this.readMode; - } - get write() { - return (this.mode & this.writeMode) === this.writeMode; - } - set write(val) { - val ? this.mode |= this.writeMode : this.mode &= ~this.writeMode; - } - get isFolder() { - return FS.isDir(this.mode); - } - get isDevice() { - return FS.isChrdev(this.mode); - } - }, - lookupPath(path, opts = {}) { - if (!path) { - throw new FS.ErrnoError(44); - } - opts.follow_mount ??= true - - if (!PATH.isAbs(path)) { - path = FS.cwd() + '/' + path; - } - - // limit max consecutive symlinks to 40 (SYMLOOP_MAX). - linkloop: for (var nlinks = 0; nlinks < 40; nlinks++) { - // split the absolute path - var parts = path.split('/').filter((p) => !!p); - - // start at the root - var current = FS.root; - var current_path = '/'; - - for (var i = 0; i < parts.length; i++) { - var islast = (i === parts.length-1); - if (islast && opts.parent) { - // stop resolving - break; - } - - if (parts[i] === '.') { - continue; - } - - if (parts[i] === '..') { - current_path = PATH.dirname(current_path); - if (FS.isRoot(current)) { - path = current_path + '/' + parts.slice(i + 1).join('/'); - // We're making progress here, don't let many consecutive ..'s - // lead to ELOOP - nlinks--; - continue linkloop; - } else { - current = current.parent; - } - continue; - } - - current_path = PATH.join2(current_path, parts[i]); - try { - current = FS.lookupNode(current, parts[i]); - } catch (e) { - // if noent_okay is true, suppress a ENOENT in the last component - // and return an object with an undefined node. This is needed for - // resolving symlinks in the path when creating a file. - if ((e?.errno === 44) && islast && opts.noent_okay) { - return { path: current_path }; - } - throw e; - } - - // jump to the mount's root node if this is a mountpoint - if (FS.isMountpoint(current) && (!islast || opts.follow_mount)) { - current = current.mounted.root; - } - - // by default, lookupPath will not follow a symlink if it is the final path component. - // setting opts.follow = true will override this behavior. - if (FS.isLink(current.mode) && (!islast || opts.follow)) { - if (!current.node_ops.readlink) { - throw new FS.ErrnoError(52); - } - var link = current.node_ops.readlink(current); - if (!PATH.isAbs(link)) { - link = PATH.dirname(current_path) + '/' + link; - } - path = link + '/' + parts.slice(i + 1).join('/'); - continue linkloop; - } - } - return { path: current_path, node: current }; - } - throw new FS.ErrnoError(32); - }, - getPath(node) { - var path; - while (true) { - if (FS.isRoot(node)) { - var mount = node.mount.mountpoint; - if (!path) return mount; - return mount[mount.length-1] !== '/' ? `${mount}/${path}` : mount + path; - } - path = path ? `${node.name}/${path}` : node.name; - node = node.parent; - } - }, - hashName(parentid, name) { - var hash = 0; - - for (var i = 0; i < name.length; i++) { - hash = ((hash << 5) - hash + name.charCodeAt(i)) | 0; - } - return ((parentid + hash) >>> 0) % FS.nameTable.length; - }, - hashAddNode(node) { - var hash = FS.hashName(node.parent.id, node.name); - node.name_next = FS.nameTable[hash]; - FS.nameTable[hash] = node; - }, - hashRemoveNode(node) { - var hash = FS.hashName(node.parent.id, node.name); - if (FS.nameTable[hash] === node) { - FS.nameTable[hash] = node.name_next; - } else { - var current = FS.nameTable[hash]; - while (current) { - if (current.name_next === node) { - current.name_next = node.name_next; - break; - } - current = current.name_next; - } - } - }, - lookupNode(parent, name) { - var errCode = FS.mayLookup(parent); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - var hash = FS.hashName(parent.id, name); - for (var node = FS.nameTable[hash]; node; node = node.name_next) { - var nodeName = node.name; - if (node.parent.id === parent.id && nodeName === name) { - return node; - } - } - // if we failed to find it in the cache, call into the VFS - return FS.lookup(parent, name); - }, - createNode(parent, name, mode, rdev) { - var node = new FS.FSNode(parent, name, mode, rdev); - - FS.hashAddNode(node); - - return node; - }, - destroyNode(node) { - FS.hashRemoveNode(node); - }, - isRoot(node) { - return node === node.parent; - }, - isMountpoint(node) { - return !!node.mounted; - }, - isFile(mode) { - return (mode & 61440) === 32768; - }, - isDir(mode) { - return (mode & 61440) === 16384; - }, - isLink(mode) { - return (mode & 61440) === 40960; - }, - isChrdev(mode) { - return (mode & 61440) === 8192; - }, - isBlkdev(mode) { - return (mode & 61440) === 24576; - }, - isFIFO(mode) { - return (mode & 61440) === 4096; - }, - isSocket(mode) { - return (mode & 49152) === 49152; - }, - flagsToPermissionString(flag) { - var perms = ['r', 'w', 'rw'][flag & 3]; - if ((flag & 512)) { - perms += 'w'; - } - return perms; - }, - nodePermissions(node, perms) { - if (FS.ignorePermissions) { - return 0; - } - // return 0 if any user, group or owner bits are set. - if (perms.includes('r') && !(node.mode & 292)) { - return 2; - } else if (perms.includes('w') && !(node.mode & 146)) { - return 2; - } else if (perms.includes('x') && !(node.mode & 73)) { - return 2; - } - return 0; - }, - mayLookup(dir) { - if (!FS.isDir(dir.mode)) return 54; - var errCode = FS.nodePermissions(dir, 'x'); - if (errCode) return errCode; - if (!dir.node_ops.lookup) return 2; - return 0; - }, - mayCreate(dir, name) { - if (!FS.isDir(dir.mode)) { - return 54; - } - try { - var node = FS.lookupNode(dir, name); - return 20; - } catch (e) { - } - return FS.nodePermissions(dir, 'wx'); - }, - mayDelete(dir, name, isdir) { - var node; - try { - node = FS.lookupNode(dir, name); - } catch (e) { - return e.errno; - } - var errCode = FS.nodePermissions(dir, 'wx'); - if (errCode) { - return errCode; - } - if (isdir) { - if (!FS.isDir(node.mode)) { - return 54; - } - if (FS.isRoot(node) || FS.getPath(node) === FS.cwd()) { - return 10; - } - } else { - if (FS.isDir(node.mode)) { - return 31; - } - } - return 0; - }, - mayOpen(node, flags) { - if (!node) { - return 44; - } - if (FS.isLink(node.mode)) { - return 32; - } else if (FS.isDir(node.mode)) { - if (FS.flagsToPermissionString(flags) !== 'r' // opening for write - || (flags & (512 | 64))) { // TODO: check for O_SEARCH? (== search for dir only) - return 31; - } - } - return FS.nodePermissions(node, FS.flagsToPermissionString(flags)); - }, - checkOpExists(op, err) { - if (!op) { - throw new FS.ErrnoError(err); - } - return op; - }, - MAX_OPEN_FDS:4096, - nextfd() { - for (var fd = 0; fd <= FS.MAX_OPEN_FDS; fd++) { - if (!FS.streams[fd]) { - return fd; - } - } - throw new FS.ErrnoError(33); - }, - getStreamChecked(fd) { - var stream = FS.getStream(fd); - if (!stream) { - throw new FS.ErrnoError(8); - } - return stream; - }, - getStream:(fd) => FS.streams[fd], - createStream(stream, fd = -1) { - - // clone it, so we can return an instance of FSStream - stream = Object.assign(new FS.FSStream(), stream); - if (fd == -1) { - fd = FS.nextfd(); - } - stream.fd = fd; - FS.streams[fd] = stream; - return stream; - }, - closeStream(fd) { - FS.streams[fd] = null; - }, - dupStream(origStream, fd = -1) { - var stream = FS.createStream(origStream, fd); - stream.stream_ops?.dup?.(stream); - return stream; - }, - doSetAttr(stream, node, attr) { - var setattr = stream?.stream_ops.setattr; - var arg = setattr ? stream : node; - setattr ??= node.node_ops.setattr; - FS.checkOpExists(setattr, 63) - setattr(arg, attr); - }, - chrdev_stream_ops:{ - open(stream) { - var device = FS.getDevice(stream.node.rdev); - // override node's stream ops with the device's - stream.stream_ops = device.stream_ops; - // forward the open call - stream.stream_ops.open?.(stream); - }, - llseek() { - throw new FS.ErrnoError(70); - }, - }, - major:(dev) => ((dev) >> 8), - minor:(dev) => ((dev) & 0xff), - makedev:(ma, mi) => ((ma) << 8 | (mi)), - registerDevice(dev, ops) { - FS.devices[dev] = { stream_ops: ops }; - }, - getDevice:(dev) => FS.devices[dev], - getMounts(mount) { - var mounts = []; - var check = [mount]; - - while (check.length) { - var m = check.pop(); - - mounts.push(m); - - check.push(...m.mounts); - } - - return mounts; - }, - syncfs(populate, callback) { - if (typeof populate == 'function') { - callback = populate; - populate = false; - } - - FS.syncFSRequests++; - - if (FS.syncFSRequests > 1) { - err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`); - } - - var mounts = FS.getMounts(FS.root.mount); - var completed = 0; - - function doCallback(errCode) { - FS.syncFSRequests--; - return callback(errCode); - } - - function done(errCode) { - if (errCode) { - if (!done.errored) { - done.errored = true; - return doCallback(errCode); - } - return; - } - if (++completed >= mounts.length) { - doCallback(null); - } - }; - - // sync all mounts - for (var mount of mounts) { - if (mount.type.syncfs) { - mount.type.syncfs(mount, populate, done); - } else { - done(null); - } - } - }, - mount(type, opts, mountpoint) { - var root = mountpoint === '/'; - var pseudo = !mountpoint; - var node; - - if (root && FS.root) { - throw new FS.ErrnoError(10); - } else if (!root && !pseudo) { - var lookup = FS.lookupPath(mountpoint, { follow_mount: false }); - - mountpoint = lookup.path; // use the absolute path - node = lookup.node; - - if (FS.isMountpoint(node)) { - throw new FS.ErrnoError(10); - } - - if (!FS.isDir(node.mode)) { - throw new FS.ErrnoError(54); - } - } - - var mount = { - type, - opts, - mountpoint, - mounts: [] - }; - - // create a root node for the fs - var mountRoot = type.mount(mount); - mountRoot.mount = mount; - mount.root = mountRoot; - - if (root) { - FS.root = mountRoot; - } else if (node) { - // set as a mountpoint - node.mounted = mount; - - // add the new mount to the current mount's children - if (node.mount) { - node.mount.mounts.push(mount); - } - } - - return mountRoot; - }, - unmount(mountpoint) { - var lookup = FS.lookupPath(mountpoint, { follow_mount: false }); - - if (!FS.isMountpoint(lookup.node)) { - throw new FS.ErrnoError(28); - } - - // destroy the nodes for this mount, and all its child mounts - var node = lookup.node; - var mount = node.mounted; - var mounts = FS.getMounts(mount); - - for (var [hash, current] of Object.entries(FS.nameTable)) { - while (current) { - var next = current.name_next; - - if (mounts.includes(current.mount)) { - FS.destroyNode(current); - } - - current = next; - } - } - - // no longer a mountpoint - node.mounted = null; - - // remove this mount from the child mounts - var idx = node.mount.mounts.indexOf(mount); - node.mount.mounts.splice(idx, 1); - }, - lookup(parent, name) { - return parent.node_ops.lookup(parent, name); - }, - mknod(path, mode, dev) { - var lookup = FS.lookupPath(path, { parent: true }); - var parent = lookup.node; - var name = PATH.basename(path); - if (!name) { - throw new FS.ErrnoError(28); - } - if (name === '.' || name === '..') { - throw new FS.ErrnoError(20); - } - var errCode = FS.mayCreate(parent, name); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - if (!parent.node_ops.mknod) { - throw new FS.ErrnoError(63); - } - return parent.node_ops.mknod(parent, name, mode, dev); - }, - statfs(path) { - return FS.statfsNode(FS.lookupPath(path, {follow: true}).node); - }, - statfsStream(stream) { - // We keep a separate statfsStream function because noderawfs overrides - // it. In noderawfs, stream.node is sometimes null. Instead, we need to - // look at stream.path. - return FS.statfsNode(stream.node); - }, - statfsNode(node) { - // NOTE: None of the defaults here are true. We're just returning safe and - // sane values. Currently nodefs and rawfs replace these defaults, - // other file systems leave them alone. - var rtn = { - bsize: 4096, - frsize: 4096, - blocks: 1e6, - bfree: 5e5, - bavail: 5e5, - files: FS.nextInode, - ffree: FS.nextInode - 1, - fsid: 42, - flags: 2, - namelen: 255, - }; - - if (node.node_ops.statfs) { - Object.assign(rtn, node.node_ops.statfs(node.mount.opts.root)); - } - return rtn; - }, - create(path, mode = 0o666) { - mode &= 4095; - mode |= 32768; - return FS.mknod(path, mode, 0); - }, - mkdir(path, mode = 0o777) { - mode &= 511 | 512; - mode |= 16384; - return FS.mknod(path, mode, 0); - }, - mkdirTree(path, mode) { - var dirs = path.split('/'); - var d = ''; - for (var dir of dirs) { - if (!dir) continue; - if (d || PATH.isAbs(path)) d += '/'; - d += dir; - try { - FS.mkdir(d, mode); - } catch(e) { - if (e.errno != 20) throw e; - } - } - }, - mkdev(path, mode, dev) { - if (typeof dev == 'undefined') { - dev = mode; - mode = 0o666; - } - mode |= 8192; - return FS.mknod(path, mode, dev); - }, - symlink(oldpath, newpath) { - if (!PATH_FS.resolve(oldpath)) { - throw new FS.ErrnoError(44); - } - var lookup = FS.lookupPath(newpath, { parent: true }); - var parent = lookup.node; - if (!parent) { - throw new FS.ErrnoError(44); - } - var newname = PATH.basename(newpath); - var errCode = FS.mayCreate(parent, newname); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - if (!parent.node_ops.symlink) { - throw new FS.ErrnoError(63); - } - return parent.node_ops.symlink(parent, newname, oldpath); - }, - rename(old_path, new_path) { - var old_dirname = PATH.dirname(old_path); - var new_dirname = PATH.dirname(new_path); - var old_name = PATH.basename(old_path); - var new_name = PATH.basename(new_path); - // parents must exist - var lookup, old_dir, new_dir; - - // let the errors from non existent directories percolate up - lookup = FS.lookupPath(old_path, { parent: true }); - old_dir = lookup.node; - lookup = FS.lookupPath(new_path, { parent: true }); - new_dir = lookup.node; - - if (!old_dir || !new_dir) throw new FS.ErrnoError(44); - // need to be part of the same mount - if (old_dir.mount !== new_dir.mount) { - throw new FS.ErrnoError(75); - } - // source must exist - var old_node = FS.lookupNode(old_dir, old_name); - // old path should not be an ancestor of the new path - var relative = PATH_FS.relative(old_path, new_dirname); - if (relative.charAt(0) !== '.') { - throw new FS.ErrnoError(28); - } - // new path should not be an ancestor of the old path - relative = PATH_FS.relative(new_path, old_dirname); - if (relative.charAt(0) !== '.') { - throw new FS.ErrnoError(55); - } - // see if the new path already exists - var new_node; - try { - new_node = FS.lookupNode(new_dir, new_name); - } catch (e) { - // not fatal - } - // early out if nothing needs to change - if (old_node === new_node) { - return; - } - // we'll need to delete the old entry - var isdir = FS.isDir(old_node.mode); - var errCode = FS.mayDelete(old_dir, old_name, isdir); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - // need delete permissions if we'll be overwriting. - // need create permissions if new doesn't already exist. - errCode = new_node ? - FS.mayDelete(new_dir, new_name, isdir) : - FS.mayCreate(new_dir, new_name); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - if (!old_dir.node_ops.rename) { - throw new FS.ErrnoError(63); - } - if (FS.isMountpoint(old_node) || (new_node && FS.isMountpoint(new_node))) { - throw new FS.ErrnoError(10); - } - // if we are going to change the parent, check write permissions - if (new_dir !== old_dir) { - errCode = FS.nodePermissions(old_dir, 'w'); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - } - // remove the node from the lookup hash - FS.hashRemoveNode(old_node); - // do the underlying fs rename - try { - old_dir.node_ops.rename(old_node, new_dir, new_name); - // update old node (we do this here to avoid each backend - // needing to) - old_node.parent = new_dir; - } catch (e) { - throw e; - } finally { - // add the node back to the hash (in case node_ops.rename - // changed its name) - FS.hashAddNode(old_node); - } - }, - rmdir(path) { - var lookup = FS.lookupPath(path, { parent: true }); - var parent = lookup.node; - var name = PATH.basename(path); - var node = FS.lookupNode(parent, name); - var errCode = FS.mayDelete(parent, name, true); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - if (!parent.node_ops.rmdir) { - throw new FS.ErrnoError(63); - } - if (FS.isMountpoint(node)) { - throw new FS.ErrnoError(10); - } - parent.node_ops.rmdir(parent, name); - FS.destroyNode(node); - }, - readdir(path) { - var lookup = FS.lookupPath(path, { follow: true }); - var node = lookup.node; - var readdir = FS.checkOpExists(node.node_ops.readdir, 54); - return readdir(node); - }, - unlink(path) { - var lookup = FS.lookupPath(path, { parent: true }); - var parent = lookup.node; - if (!parent) { - throw new FS.ErrnoError(44); - } - var name = PATH.basename(path); - var node = FS.lookupNode(parent, name); - var errCode = FS.mayDelete(parent, name, false); - if (errCode) { - // According to POSIX, we should map EISDIR to EPERM, but - // we instead do what Linux does (and we must, as we use - // the musl linux libc). - throw new FS.ErrnoError(errCode); - } - if (!parent.node_ops.unlink) { - throw new FS.ErrnoError(63); - } - if (FS.isMountpoint(node)) { - throw new FS.ErrnoError(10); - } - parent.node_ops.unlink(parent, name); - FS.destroyNode(node); - }, - readlink(path) { - var lookup = FS.lookupPath(path); - var link = lookup.node; - if (!link) { - throw new FS.ErrnoError(44); - } - if (!link.node_ops.readlink) { - throw new FS.ErrnoError(28); - } - return link.node_ops.readlink(link); - }, - stat(path, dontFollow) { - var lookup = FS.lookupPath(path, { follow: !dontFollow }); - var node = lookup.node; - var getattr = FS.checkOpExists(node.node_ops.getattr, 63); - return getattr(node); - }, - fstat(fd) { - var stream = FS.getStreamChecked(fd); - var node = stream.node; - var getattr = stream.stream_ops.getattr; - var arg = getattr ? stream : node; - getattr ??= node.node_ops.getattr; - FS.checkOpExists(getattr, 63) - return getattr(arg); - }, - lstat(path) { - return FS.stat(path, true); - }, - doChmod(stream, node, mode, dontFollow) { - FS.doSetAttr(stream, node, { - mode: (mode & 4095) | (node.mode & ~4095), - ctime: Date.now(), - dontFollow - }); - }, - chmod(path, mode, dontFollow) { - var node; - if (typeof path == 'string') { - var lookup = FS.lookupPath(path, { follow: !dontFollow }); - node = lookup.node; - } else { - node = path; - } - FS.doChmod(null, node, mode, dontFollow); - }, - lchmod(path, mode) { - FS.chmod(path, mode, true); - }, - fchmod(fd, mode) { - var stream = FS.getStreamChecked(fd); - FS.doChmod(stream, stream.node, mode, false); - }, - doChown(stream, node, dontFollow) { - FS.doSetAttr(stream, node, { - timestamp: Date.now(), - dontFollow - // we ignore the uid / gid for now - }); - }, - chown(path, uid, gid, dontFollow) { - var node; - if (typeof path == 'string') { - var lookup = FS.lookupPath(path, { follow: !dontFollow }); - node = lookup.node; - } else { - node = path; - } - FS.doChown(null, node, dontFollow); - }, - lchown(path, uid, gid) { - FS.chown(path, uid, gid, true); - }, - fchown(fd, uid, gid) { - var stream = FS.getStreamChecked(fd); - FS.doChown(stream, stream.node, false); - }, - doTruncate(stream, node, len) { - if (FS.isDir(node.mode)) { - throw new FS.ErrnoError(31); - } - if (!FS.isFile(node.mode)) { - throw new FS.ErrnoError(28); - } - var errCode = FS.nodePermissions(node, 'w'); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - FS.doSetAttr(stream, node, { - size: len, - timestamp: Date.now() - }); - }, - truncate(path, len) { - if (len < 0) { - throw new FS.ErrnoError(28); - } - var node; - if (typeof path == 'string') { - var lookup = FS.lookupPath(path, { follow: true }); - node = lookup.node; - } else { - node = path; - } - FS.doTruncate(null, node, len); - }, - ftruncate(fd, len) { - var stream = FS.getStreamChecked(fd); - if (len < 0 || (stream.flags & 2097155) === 0) { - throw new FS.ErrnoError(28); - } - FS.doTruncate(stream, stream.node, len); - }, - utime(path, atime, mtime) { - var lookup = FS.lookupPath(path, { follow: true }); - var node = lookup.node; - var setattr = FS.checkOpExists(node.node_ops.setattr, 63); - setattr(node, { - atime: atime, - mtime: mtime - }); - }, - open(path, flags, mode = 0o666) { - if (path === "") { - throw new FS.ErrnoError(44); - } - flags = typeof flags == 'string' ? FS_modeStringToFlags(flags) : flags; - if ((flags & 64)) { - mode = (mode & 4095) | 32768; - } else { - mode = 0; - } - var node; - var isDirPath; - if (typeof path == 'object') { - node = path; - } else { - isDirPath = path.endsWith("/"); - // noent_okay makes it so that if the final component of the path - // doesn't exist, lookupPath returns `node: undefined`. `path` will be - // updated to point to the target of all symlinks. - var lookup = FS.lookupPath(path, { - follow: !(flags & 131072), - noent_okay: true - }); - node = lookup.node; - path = lookup.path; - } - // perhaps we need to create the node - var created = false; - if ((flags & 64)) { - if (node) { - // if O_CREAT and O_EXCL are set, error out if the node already exists - if ((flags & 128)) { - throw new FS.ErrnoError(20); - } - } else if (isDirPath) { - throw new FS.ErrnoError(31); - } else { - // node doesn't exist, try to create it - // Ignore the permission bits here to ensure we can `open` this new - // file below. We use chmod below the apply the permissions once the - // file is open. - node = FS.mknod(path, mode | 0o777, 0); - created = true; - } - } - if (!node) { - throw new FS.ErrnoError(44); - } - // can't truncate a device - if (FS.isChrdev(node.mode)) { - flags &= ~512; - } - // if asked only for a directory, then this must be one - if ((flags & 65536) && !FS.isDir(node.mode)) { - throw new FS.ErrnoError(54); - } - // check permissions, if this is not a file we just created now (it is ok to - // create and write to a file with read-only permissions; it is read-only - // for later use) - if (!created) { - var errCode = FS.mayOpen(node, flags); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - } - // do truncation if necessary - if ((flags & 512) && !created) { - FS.truncate(node, 0); - } - // we've already handled these, don't pass down to the underlying vfs - flags &= ~(128 | 512 | 131072); - - // register the stream with the filesystem - var stream = FS.createStream({ - node, - path: FS.getPath(node), // we want the absolute path to the node - flags, - seekable: true, - position: 0, - stream_ops: node.stream_ops, - // used by the file family libc calls (fopen, fwrite, ferror, etc.) - ungotten: [], - error: false - }); - // call the new stream's open function - if (stream.stream_ops.open) { - stream.stream_ops.open(stream); - } - if (created) { - FS.chmod(node, mode & 0o777); - } - if (Module['logReadFiles'] && !(flags & 1)) { - if (!(path in FS.readFiles)) { - FS.readFiles[path] = 1; - } - } - return stream; - }, - close(stream) { - if (FS.isClosed(stream)) { - throw new FS.ErrnoError(8); - } - if (stream.getdents) stream.getdents = null; // free readdir state - try { - if (stream.stream_ops.close) { - stream.stream_ops.close(stream); - } - } catch (e) { - throw e; - } finally { - FS.closeStream(stream.fd); - } - stream.fd = null; - }, - isClosed(stream) { - return stream.fd === null; - }, - llseek(stream, offset, whence) { - if (FS.isClosed(stream)) { - throw new FS.ErrnoError(8); - } - if (!stream.seekable || !stream.stream_ops.llseek) { - throw new FS.ErrnoError(70); - } - if (whence != 0 && whence != 1 && whence != 2) { - throw new FS.ErrnoError(28); - } - stream.position = stream.stream_ops.llseek(stream, offset, whence); - stream.ungotten = []; - return stream.position; - }, - read(stream, buffer, offset, length, position) { - if (length < 0 || position < 0) { - throw new FS.ErrnoError(28); - } - if (FS.isClosed(stream)) { - throw new FS.ErrnoError(8); - } - if ((stream.flags & 2097155) === 1) { - throw new FS.ErrnoError(8); - } - if (FS.isDir(stream.node.mode)) { - throw new FS.ErrnoError(31); - } - if (!stream.stream_ops.read) { - throw new FS.ErrnoError(28); - } - var seeking = typeof position != 'undefined'; - if (!seeking) { - position = stream.position; - } else if (!stream.seekable) { - throw new FS.ErrnoError(70); - } - var bytesRead = stream.stream_ops.read(stream, buffer, offset, length, position); - if (!seeking) stream.position += bytesRead; - return bytesRead; - }, - write(stream, buffer, offset, length, position, canOwn) { - if (length < 0 || position < 0) { - throw new FS.ErrnoError(28); - } - if (FS.isClosed(stream)) { - throw new FS.ErrnoError(8); - } - if ((stream.flags & 2097155) === 0) { - throw new FS.ErrnoError(8); - } - if (FS.isDir(stream.node.mode)) { - throw new FS.ErrnoError(31); - } - if (!stream.stream_ops.write) { - throw new FS.ErrnoError(28); - } - if (stream.seekable && stream.flags & 1024) { - // seek to the end before writing in append mode - FS.llseek(stream, 0, 2); - } - var seeking = typeof position != 'undefined'; - if (!seeking) { - position = stream.position; - } else if (!stream.seekable) { - throw new FS.ErrnoError(70); - } - var bytesWritten = stream.stream_ops.write(stream, buffer, offset, length, position, canOwn); - if (!seeking) stream.position += bytesWritten; - return bytesWritten; - }, - mmap(stream, length, position, prot, flags) { - // User requests writing to file (prot & PROT_WRITE != 0). - // Checking if we have permissions to write to the file unless - // MAP_PRIVATE flag is set. According to POSIX spec it is possible - // to write to file opened in read-only mode with MAP_PRIVATE flag, - // as all modifications will be visible only in the memory of - // the current process. - if ((prot & 2) !== 0 - && (flags & 2) === 0 - && (stream.flags & 2097155) !== 2) { - throw new FS.ErrnoError(2); - } - if ((stream.flags & 2097155) === 1) { - throw new FS.ErrnoError(2); - } - if (!stream.stream_ops.mmap) { - throw new FS.ErrnoError(43); - } - if (!length) { - throw new FS.ErrnoError(28); - } - return stream.stream_ops.mmap(stream, length, position, prot, flags); - }, - msync(stream, buffer, offset, length, mmapFlags) { - if (!stream.stream_ops.msync) { - return 0; - } - return stream.stream_ops.msync(stream, buffer, offset, length, mmapFlags); - }, - ioctl(stream, cmd, arg) { - if (!stream.stream_ops.ioctl) { - throw new FS.ErrnoError(59); - } - return stream.stream_ops.ioctl(stream, cmd, arg); - }, - readFile(path, opts = {}) { - opts.flags = opts.flags || 0; - opts.encoding = opts.encoding || 'binary'; - if (opts.encoding !== 'utf8' && opts.encoding !== 'binary') { - abort(`Invalid encoding type "${opts.encoding}"`); - } - var stream = FS.open(path, opts.flags); - var stat = FS.stat(path); - var length = stat.size; - var buf = new Uint8Array(length); - FS.read(stream, buf, 0, length, 0); - if (opts.encoding === 'utf8') { - buf = UTF8ArrayToString(buf); - } - FS.close(stream); - return buf; - }, - writeFile(path, data, opts = {}) { - opts.flags = opts.flags || 577; - var stream = FS.open(path, opts.flags, opts.mode); - if (typeof data == 'string') { - data = new Uint8Array(intArrayFromString(data, true)); - } - if (ArrayBuffer.isView(data)) { - FS.write(stream, data, 0, data.byteLength, undefined, opts.canOwn); - } else { - abort('Unsupported data type'); - } - FS.close(stream); - }, - cwd:() => FS.currentPath, - chdir(path) { - var lookup = FS.lookupPath(path, { follow: true }); - if (lookup.node === null) { - throw new FS.ErrnoError(44); - } - if (!FS.isDir(lookup.node.mode)) { - throw new FS.ErrnoError(54); - } - var errCode = FS.nodePermissions(lookup.node, 'x'); - if (errCode) { - throw new FS.ErrnoError(errCode); - } - FS.currentPath = lookup.path; - }, - createDefaultDirectories() { - FS.mkdir('/tmp'); - FS.mkdir('/home'); - FS.mkdir('/home/web_user'); - }, - createDefaultDevices() { - // create /dev - FS.mkdir('/dev'); - // setup /dev/null - FS.registerDevice(FS.makedev(1, 3), { - read: () => 0, - write: (stream, buffer, offset, length, pos) => length, - llseek: () => 0, - }); - FS.mkdev('/dev/null', FS.makedev(1, 3)); - // setup /dev/tty and /dev/tty1 - // stderr needs to print output using err() rather than out() - // so we register a second tty just for it. - TTY.register(FS.makedev(5, 0), TTY.default_tty_ops); - TTY.register(FS.makedev(6, 0), TTY.default_tty1_ops); - FS.mkdev('/dev/tty', FS.makedev(5, 0)); - FS.mkdev('/dev/tty1', FS.makedev(6, 0)); - // setup /dev/[u]random - // use a buffer to avoid overhead of individual crypto calls per byte - var randomBuffer = new Uint8Array(1024), randomLeft = 0; - var randomByte = () => { - if (randomLeft === 0) { - randomFill(randomBuffer); - randomLeft = randomBuffer.byteLength; - } - return randomBuffer[--randomLeft]; - }; - FS.createDevice('/dev', 'random', randomByte); - FS.createDevice('/dev', 'urandom', randomByte); - // we're not going to emulate the actual shm device, - // just create the tmp dirs that reside in it commonly - FS.mkdir('/dev/shm'); - FS.mkdir('/dev/shm/tmp'); - }, - createSpecialDirectories() { - // create /proc/self/fd which allows /proc/self/fd/6 => readlink gives the - // name of the stream for fd 6 (see test_unistd_ttyname) - FS.mkdir('/proc'); - var proc_self = FS.mkdir('/proc/self'); - FS.mkdir('/proc/self/fd'); - FS.mount({ - mount() { - var node = FS.createNode(proc_self, 'fd', 16895, 73); - node.stream_ops = { - llseek: MEMFS.stream_ops.llseek, - }; - node.node_ops = { - lookup(parent, name) { - var fd = +name; - var stream = FS.getStreamChecked(fd); - var ret = { - parent: null, - mount: { mountpoint: 'fake' }, - node_ops: { readlink: () => stream.path }, - id: fd + 1, - }; - ret.parent = ret; // make it look like a simple root node - return ret; - }, - readdir() { - return Array.from(FS.streams.entries()) - .filter(([k, v]) => v) - .map(([k, v]) => k.toString()); - } - }; - return node; - } - }, {}, '/proc/self/fd'); - }, - createStandardStreams(input, output, error) { - // TODO deprecate the old functionality of a single - // input / output callback and that utilizes FS.createDevice - // and instead require a unique set of stream ops - - // by default, we symlink the standard streams to the - // default tty devices. however, if the standard streams - // have been overwritten we create a unique device for - // them instead. - if (input) { - FS.createDevice('/dev', 'stdin', input); - } else { - FS.symlink('/dev/tty', '/dev/stdin'); - } - if (output) { - FS.createDevice('/dev', 'stdout', null, output); - } else { - FS.symlink('/dev/tty', '/dev/stdout'); - } - if (error) { - FS.createDevice('/dev', 'stderr', null, error); - } else { - FS.symlink('/dev/tty1', '/dev/stderr'); - } - - // open default streams for the stdin, stdout and stderr devices - var stdin = FS.open('/dev/stdin', 0); - var stdout = FS.open('/dev/stdout', 1); - var stderr = FS.open('/dev/stderr', 1); - }, - staticInit() { - FS.nameTable = new Array(4096); - - FS.mount(MEMFS, {}, '/'); - - FS.createDefaultDirectories(); - FS.createDefaultDevices(); - FS.createSpecialDirectories(); - - FS.filesystems = { - 'MEMFS': MEMFS, - }; - }, - init(input, output, error) { - FS.initialized = true; - - // Allow Module.stdin etc. to provide defaults, if none explicitly passed to us here - input ??= Module['stdin']; - output ??= Module['stdout']; - error ??= Module['stderr']; - - FS.createStandardStreams(input, output, error); - }, - quit() { - FS.initialized = false; - // force-flush all streams, so we get musl std streams printed out - // close all of our streams - for (var stream of FS.streams) { - if (stream) { - FS.close(stream); - } - } - }, - findObject(path, dontResolveLastLink) { - var ret = FS.analyzePath(path, dontResolveLastLink); - if (!ret.exists) { - return null; - } - return ret.object; - }, - analyzePath(path, dontResolveLastLink) { - // operate from within the context of the symlink's target - try { - var lookup = FS.lookupPath(path, { follow: !dontResolveLastLink }); - path = lookup.path; - } catch (e) { - } - var ret = { - isRoot: false, exists: false, error: 0, name: null, path: null, object: null, - parentExists: false, parentPath: null, parentObject: null - }; - try { - var lookup = FS.lookupPath(path, { parent: true }); - ret.parentExists = true; - ret.parentPath = lookup.path; - ret.parentObject = lookup.node; - ret.name = PATH.basename(path); - lookup = FS.lookupPath(path, { follow: !dontResolveLastLink }); - ret.exists = true; - ret.path = lookup.path; - ret.object = lookup.node; - ret.name = lookup.node.name; - ret.isRoot = lookup.path === '/'; - } catch (e) { - ret.error = e.errno; - }; - return ret; - }, - createPath(parent, path, canRead, canWrite) { - parent = typeof parent == 'string' ? parent : FS.getPath(parent); - var parts = path.split('/').reverse(); - while (parts.length) { - var part = parts.pop(); - if (!part) continue; - var current = PATH.join2(parent, part); - try { - FS.mkdir(current); - } catch (e) { - if (e.errno != 20) throw e; - } - parent = current; - } - return current; - }, - createFile(parent, name, properties, canRead, canWrite) { - var path = PATH.join2(typeof parent == 'string' ? parent : FS.getPath(parent), name); - var mode = FS_getMode(canRead, canWrite); - return FS.create(path, mode); - }, - createDataFile(parent, name, data, canRead, canWrite, canOwn) { - var path = name; - if (parent) { - parent = typeof parent == 'string' ? parent : FS.getPath(parent); - path = name ? PATH.join2(parent, name) : parent; - } - var mode = FS_getMode(canRead, canWrite); - var node = FS.create(path, mode); - if (data) { - if (typeof data == 'string') { - var arr = new Array(data.length); - for (var i = 0, len = data.length; i < len; ++i) arr[i] = data.charCodeAt(i); - data = arr; - } - // make sure we can write to the file - FS.chmod(node, mode | 146); - var stream = FS.open(node, 577); - FS.write(stream, data, 0, data.length, 0, canOwn); - FS.close(stream); - FS.chmod(node, mode); - } - }, - createDevice(parent, name, input, output) { - var path = PATH.join2(typeof parent == 'string' ? parent : FS.getPath(parent), name); - var mode = FS_getMode(!!input, !!output); - FS.createDevice.major ??= 64; - var dev = FS.makedev(FS.createDevice.major++, 0); - // Create a fake device that a set of stream ops to emulate - // the old behavior. - FS.registerDevice(dev, { - open(stream) { - stream.seekable = false; - }, - close(stream) { - // flush any pending line data - if (output?.buffer?.length) { - output(10); - } - }, - read(stream, buffer, offset, length, pos /* ignored */) { - var bytesRead = 0; - for (var i = 0; i < length; i++) { - var result; - try { - result = input(); - } catch (e) { - throw new FS.ErrnoError(29); - } - if (result === undefined && bytesRead === 0) { - throw new FS.ErrnoError(6); - } - if (result === null || result === undefined) break; - bytesRead++; - buffer[offset+i] = result; - } - if (bytesRead) { - stream.node.atime = Date.now(); - } - return bytesRead; - }, - write(stream, buffer, offset, length, pos) { - for (var i = 0; i < length; i++) { - try { - output(buffer[offset+i]); - } catch (e) { - throw new FS.ErrnoError(29); - } - } - if (length) { - stream.node.mtime = stream.node.ctime = Date.now(); - } - return i; - } - }); - return FS.mkdev(path, mode, dev); - }, - forceLoadFile(obj) { - if (obj.isDevice || obj.isFolder || obj.link || obj.contents) return true; - if (globalThis.XMLHttpRequest) { - abort("Lazy loading should have been performed (contents set) in createLazyFile, but it was not. Lazy loading only works in web workers. Use --embed-file or --preload-file in emcc on the main thread."); - } else { // Command-line. - try { - obj.contents = readBinary(obj.url); - } catch (e) { - throw new FS.ErrnoError(29); - } - } - }, - createLazyFile(parent, name, url, canRead, canWrite) { - // Lazy chunked Uint8Array (implements get and length from Uint8Array). - // Actual getting is abstracted away for eventual reuse. - class LazyUint8Array { - lengthKnown = false; - chunks = []; // Loaded chunks. Index is the chunk number - get(idx) { - if (idx > this.length-1 || idx < 0) { - return undefined; - } - var chunkOffset = idx % this.chunkSize; - var chunkNum = (idx / this.chunkSize)|0; - return this.getter(chunkNum)[chunkOffset]; - } - setDataGetter(getter) { - this.getter = getter; - } - cacheLength() { - // Find length - var xhr = new XMLHttpRequest(); - xhr.open('HEAD', url, false); - xhr.send(null); - if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) abort("Couldn't load " + url + ". Status: " + xhr.status); - var datalength = Number(xhr.getResponseHeader("Content-length")); - var header; - var hasByteServing = (header = xhr.getResponseHeader("Accept-Ranges")) && header === "bytes"; - var usesGzip = (header = xhr.getResponseHeader("Content-Encoding")) && header === "gzip"; - - var chunkSize = 1024*1024; // Chunk size in bytes - - if (!hasByteServing) chunkSize = datalength; - - // Function to get a range from the remote URL. - var doXHR = (from, to) => { - if (from > to) abort("invalid range (" + from + ", " + to + ") or no bytes requested!"); - if (to > datalength-1) abort("only " + datalength + " bytes available! programmer error!"); - - // TODO: Use mozResponseArrayBuffer, responseStream, etc. if available. - var xhr = new XMLHttpRequest(); - xhr.open('GET', url, false); - if (datalength !== chunkSize) xhr.setRequestHeader("Range", "bytes=" + from + "-" + to); - - // Some hints to the browser that we want binary data. - xhr.responseType = 'arraybuffer'; - if (xhr.overrideMimeType) { - xhr.overrideMimeType('text/plain; charset=x-user-defined'); - } - - xhr.send(null); - if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) abort("Couldn't load " + url + ". Status: " + xhr.status); - if (xhr.response !== undefined) { - return new Uint8Array(/** @type{Array} */(xhr.response || [])); - } - return intArrayFromString(xhr.responseText || '', true); - }; - var lazyArray = this; - lazyArray.setDataGetter((chunkNum) => { - var start = chunkNum * chunkSize; - var end = (chunkNum+1) * chunkSize - 1; // including this byte - end = Math.min(end, datalength-1); // if datalength-1 is selected, this is the last block - if (typeof lazyArray.chunks[chunkNum] == 'undefined') { - lazyArray.chunks[chunkNum] = doXHR(start, end); - } - if (typeof lazyArray.chunks[chunkNum] == 'undefined') abort('doXHR failed!'); - return lazyArray.chunks[chunkNum]; - }); - - if (usesGzip || !datalength) { - // if the server uses gzip or doesn't supply the length, we have to download the whole file to get the (uncompressed) length - chunkSize = datalength = 1; // this will force getter(0)/doXHR do download the whole file - datalength = this.getter(0).length; - chunkSize = datalength; - out("LazyFiles on gzip forces download of the whole file when length is accessed"); - } - - this._length = datalength; - this._chunkSize = chunkSize; - this.lengthKnown = true; - } - get length() { - if (!this.lengthKnown) { - this.cacheLength(); - } - return this._length; - } - get chunkSize() { - if (!this.lengthKnown) { - this.cacheLength(); - } - return this._chunkSize; - } - } - - if (globalThis.XMLHttpRequest) { - if (!ENVIRONMENT_IS_WORKER) abort('Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc'); - var lazyArray = new LazyUint8Array(); - var properties = { isDevice: false, contents: lazyArray }; - } else { - var properties = { isDevice: false, url: url }; - } - - var node = FS.createFile(parent, name, properties, canRead, canWrite); - // This is a total hack, but I want to get this lazy file code out of the - // core of MEMFS. If we want to keep this lazy file concept I feel it should - // be its own thin LAZYFS proxying calls to MEMFS. - if (properties.contents) { - node.contents = properties.contents; - } else if (properties.url) { - node.contents = null; - node.url = properties.url; - } - // Add a function that defers querying the file size until it is asked the first time. - Object.defineProperties(node, { - usedBytes: { - get: function() { return this.contents.length; } - } - }); - // override each stream op with one that tries to force load the lazy file first - var stream_ops = {}; - for (const [key, fn] of Object.entries(node.stream_ops)) { - stream_ops[key] = (...args) => { - FS.forceLoadFile(node); - return fn(...args); - }; - } - function writeChunks(stream, buffer, offset, length, position) { - var contents = stream.node.contents; - if (position >= contents.length) - return 0; - var size = Math.min(contents.length - position, length); - if (contents.slice) { // normal array - for (var i = 0; i < size; i++) { - buffer[offset + i] = contents[position + i]; - } - } else { - for (var i = 0; i < size; i++) { // LazyUint8Array from sync binary XHR - buffer[offset + i] = contents.get(position + i); - } - } - return size; - } - // use a custom read function - stream_ops.read = (stream, buffer, offset, length, position) => { - FS.forceLoadFile(node); - return writeChunks(stream, buffer, offset, length, position) - }; - // use a custom mmap function - stream_ops.mmap = (stream, length, position, prot, flags) => { - FS.forceLoadFile(node); - var ptr = mmapAlloc(length); - if (!ptr) { - throw new FS.ErrnoError(48); - } - writeChunks(stream, HEAP8, ptr, length, position); - return { ptr, allocated: true }; - }; - node.stream_ops = stream_ops; - return node; - }, - }; - - - /** - * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the - * emscripten HEAP, returns a copy of that string as a Javascript String object. - * - * @param {number} ptr - * @param {number=} maxBytesToRead - An optional length that specifies the - * maximum number of bytes to read. You can omit this parameter to scan the - * string until the first 0 byte. If maxBytesToRead is passed, and the string - * at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the - * string will cut short at that byte index. - * @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character. - * @return {string} - */ - var UTF8ToString = (ptr, maxBytesToRead, ignoreNul) => { - return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead, ignoreNul) : ''; - }; - var SYSCALLS = { - DEFAULT_POLLMASK:5, - calculateAt(dirfd, path, allowEmpty) { - if (PATH.isAbs(path)) { - return path; - } - // relative path - var dir; - if (dirfd === -100) { - dir = FS.cwd(); - } else { - var dirstream = SYSCALLS.getStreamFromFD(dirfd); - dir = dirstream.path; - } - if (path.length == 0) { - if (!allowEmpty) { - throw new FS.ErrnoError(44);; - } - return dir; - } - return dir + '/' + path; - }, - writeStat(buf, stat) { - HEAPU32[((buf)>>2)] = stat.dev; - HEAPU32[(((buf)+(4))>>2)] = stat.mode; - HEAPU32[(((buf)+(8))>>2)] = stat.nlink; - HEAPU32[(((buf)+(12))>>2)] = stat.uid; - HEAPU32[(((buf)+(16))>>2)] = stat.gid; - HEAPU32[(((buf)+(20))>>2)] = stat.rdev; - HEAP64[(((buf)+(24))>>3)] = BigInt(stat.size); - HEAP32[(((buf)+(32))>>2)] = 4096; - HEAP32[(((buf)+(36))>>2)] = stat.blocks; - var atime = stat.atime.getTime(); - var mtime = stat.mtime.getTime(); - var ctime = stat.ctime.getTime(); - HEAP64[(((buf)+(40))>>3)] = BigInt(Math.floor(atime / 1000)); - HEAPU32[(((buf)+(48))>>2)] = (atime % 1000) * 1000 * 1000; - HEAP64[(((buf)+(56))>>3)] = BigInt(Math.floor(mtime / 1000)); - HEAPU32[(((buf)+(64))>>2)] = (mtime % 1000) * 1000 * 1000; - HEAP64[(((buf)+(72))>>3)] = BigInt(Math.floor(ctime / 1000)); - HEAPU32[(((buf)+(80))>>2)] = (ctime % 1000) * 1000 * 1000; - HEAP64[(((buf)+(88))>>3)] = BigInt(stat.ino); - return 0; - }, - writeStatFs(buf, stats) { - HEAPU32[(((buf)+(4))>>2)] = stats.bsize; - HEAPU32[(((buf)+(60))>>2)] = stats.bsize; - HEAP64[(((buf)+(8))>>3)] = BigInt(stats.blocks); - HEAP64[(((buf)+(16))>>3)] = BigInt(stats.bfree); - HEAP64[(((buf)+(24))>>3)] = BigInt(stats.bavail); - HEAP64[(((buf)+(32))>>3)] = BigInt(stats.files); - HEAP64[(((buf)+(40))>>3)] = BigInt(stats.ffree); - HEAPU32[(((buf)+(48))>>2)] = stats.fsid; - HEAPU32[(((buf)+(64))>>2)] = stats.flags; // ST_NOSUID - HEAPU32[(((buf)+(56))>>2)] = stats.namelen; - }, - doMsync(addr, stream, len, flags, offset) { - if (!FS.isFile(stream.node.mode)) { - throw new FS.ErrnoError(43); - } - if (flags & 2) { - // MAP_PRIVATE calls need not to be synced back to underlying fs - return 0; - } - var buffer = HEAPU8.slice(addr, addr + len); - FS.msync(stream, buffer, offset, len, flags); - }, - getStreamFromFD(fd) { - var stream = FS.getStreamChecked(fd); - return stream; - }, - varargs:undefined, - getStr(ptr) { - var ret = UTF8ToString(ptr); - return ret; - }, - }; - function ___syscall_fcntl64(fd, cmd, varargs) { - SYSCALLS.varargs = varargs; - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - switch (cmd) { - case 0: { - var arg = syscallGetVarargI(); - if (arg < 0) { - return -28; - } - while (FS.streams[arg]) { - arg++; - } - var newStream; - newStream = FS.dupStream(stream, arg); - return newStream.fd; - } - case 1: - case 2: - return 0; // FD_CLOEXEC makes no sense for a single process. - case 3: - return stream.flags; - case 4: { - var arg = syscallGetVarargI(); - stream.flags |= arg; - return 0; - } - case 12: { - var arg = syscallGetVarargP(); - var offset = 0; - // We're always unlocked. - HEAP16[(((arg)+(offset))>>1)] = 2; - return 0; - } - case 13: - case 14: - // Pretend that the locking is successful. These are process-level locks, - // and Emscripten programs are a single process. If we supported linking a - // filesystem between programs, we'd need to do more here. - // See https://github.com/emscripten-core/emscripten/issues/23697 - return 0; - } - return -28; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - function ___syscall_fstat64(fd, buf) { - try { - - return SYSCALLS.writeStat(buf, FS.fstat(fd)); - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - - function ___syscall_ioctl(fd, op, varargs) { - SYSCALLS.varargs = varargs; - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - switch (op) { - case 21509: { - if (!stream.tty) return -59; - return 0; - } - case 21505: { - if (!stream.tty) return -59; - if (stream.tty.ops.ioctl_tcgets) { - var termios = stream.tty.ops.ioctl_tcgets(stream); - var argp = syscallGetVarargP(); - HEAP32[((argp)>>2)] = termios.c_iflag || 0; - HEAP32[(((argp)+(4))>>2)] = termios.c_oflag || 0; - HEAP32[(((argp)+(8))>>2)] = termios.c_cflag || 0; - HEAP32[(((argp)+(12))>>2)] = termios.c_lflag || 0; - for (var i = 0; i < 32; i++) { - HEAP8[(argp + i)+(17)] = termios.c_cc[i] || 0; - } - return 0; - } - return 0; - } - case 21510: - case 21511: - case 21512: { - if (!stream.tty) return -59; - return 0; // no-op, not actually adjusting terminal settings - } - case 21506: - case 21507: - case 21508: { - if (!stream.tty) return -59; - if (stream.tty.ops.ioctl_tcsets) { - var argp = syscallGetVarargP(); - var c_iflag = HEAP32[((argp)>>2)]; - var c_oflag = HEAP32[(((argp)+(4))>>2)]; - var c_cflag = HEAP32[(((argp)+(8))>>2)]; - var c_lflag = HEAP32[(((argp)+(12))>>2)]; - var c_cc = [] - for (var i = 0; i < 32; i++) { - c_cc.push(HEAP8[(argp + i)+(17)]); - } - return stream.tty.ops.ioctl_tcsets(stream.tty, op, { c_iflag, c_oflag, c_cflag, c_lflag, c_cc }); - } - return 0; // no-op, not actually adjusting terminal settings - } - case 21519: { - if (!stream.tty) return -59; - var argp = syscallGetVarargP(); - HEAP32[((argp)>>2)] = 0; - return 0; - } - case 21520: { - if (!stream.tty) return -59; - return -28; // not supported - } - case 21537: - case 21531: { - var argp = syscallGetVarargP(); - return FS.ioctl(stream, op, argp); - } - case 21523: { - // TODO: in theory we should write to the winsize struct that gets - // passed in, but for now musl doesn't read anything on it - if (!stream.tty) return -59; - if (stream.tty.ops.ioctl_tiocgwinsz) { - var winsize = stream.tty.ops.ioctl_tiocgwinsz(stream.tty); - var argp = syscallGetVarargP(); - HEAP16[((argp)>>1)] = winsize[0]; - HEAP16[(((argp)+(2))>>1)] = winsize[1]; - } - return 0; - } - case 21524: { - // TODO: technically, this ioctl call should change the window size. - // but, since emscripten doesn't have any concept of a terminal window - // yet, we'll just silently throw it away as we do TIOCGWINSZ - if (!stream.tty) return -59; - return 0; - } - case 21515: { - if (!stream.tty) return -59; - return 0; - } - default: return -28; // not supported - } - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - function ___syscall_lstat64(path, buf) { - try { - - path = SYSCALLS.getStr(path); - return SYSCALLS.writeStat(buf, FS.lstat(path)); - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - function ___syscall_newfstatat(dirfd, path, buf, flags) { - try { - - path = SYSCALLS.getStr(path); - var nofollow = flags & 256; - var allowEmpty = flags & 4096; - flags = flags & (~6400); - path = SYSCALLS.calculateAt(dirfd, path, allowEmpty); - return SYSCALLS.writeStat(buf, nofollow ? FS.lstat(path) : FS.stat(path)); - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - - function ___syscall_openat(dirfd, path, flags, varargs) { - SYSCALLS.varargs = varargs; - try { - - path = SYSCALLS.getStr(path); - path = SYSCALLS.calculateAt(dirfd, path); - var mode = varargs ? syscallGetVarargI() : 0; - return FS.open(path, flags, mode).fd; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - function ___syscall_stat64(path, buf) { - try { - - path = SYSCALLS.getStr(path); - return SYSCALLS.writeStat(buf, FS.stat(path)); - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - } - - var __abort_js = () => - abort(''); - - var structRegistrations = { - }; - - var runDestructors = (destructors) => { - while (destructors.length) { - var ptr = destructors.pop(); - var del = destructors.pop(); - del(ptr); - } - }; - - /** @suppress {globalThis} */ - function readPointer(pointer) { - return this.fromWireType(HEAPU32[((pointer)>>2)]); - } - - var awaitingDependencies = { - }; - - var registeredTypes = { - }; - - var typeDependencies = { - }; - - var InternalError = class InternalError extends Error { constructor(message) { super(message); this.name = 'InternalError'; }}; - var throwInternalError = (message) => { throw new InternalError(message); }; - var whenDependentTypesAreResolved = (myTypes, dependentTypes, getTypeConverters) => { - myTypes.forEach((type) => typeDependencies[type] = dependentTypes); - - function onComplete(typeConverters) { - var myTypeConverters = getTypeConverters(typeConverters); - if (myTypeConverters.length !== myTypes.length) { - throwInternalError('Mismatched type converter count'); - } - for (var i = 0; i < myTypes.length; ++i) { - registerType(myTypes[i], myTypeConverters[i]); - } - } - - var typeConverters = new Array(dependentTypes.length); - var unregisteredTypes = []; - var registered = 0; - for (let [i, dt] of dependentTypes.entries()) { - if (registeredTypes.hasOwnProperty(dt)) { - typeConverters[i] = registeredTypes[dt]; - } else { - unregisteredTypes.push(dt); - if (!awaitingDependencies.hasOwnProperty(dt)) { - awaitingDependencies[dt] = []; - } - awaitingDependencies[dt].push(() => { - typeConverters[i] = registeredTypes[dt]; - ++registered; - if (registered === unregisteredTypes.length) { - onComplete(typeConverters); - } - }); - } - } - if (0 === unregisteredTypes.length) { - onComplete(typeConverters); - } - }; - var __embind_finalize_value_object = (structType) => { - var reg = structRegistrations[structType]; - delete structRegistrations[structType]; - - var rawConstructor = reg.rawConstructor; - var rawDestructor = reg.rawDestructor; - var fieldRecords = reg.fields; - var fieldTypes = fieldRecords.map((field) => field.getterReturnType). - concat(fieldRecords.map((field) => field.setterArgumentType)); - whenDependentTypesAreResolved([structType], fieldTypes, (fieldTypes) => { - var fields = {}; - for (var [i, field] of fieldRecords.entries()) { - const getterReturnType = fieldTypes[i]; - const getter = field.getter; - const getterContext = field.getterContext; - const setterArgumentType = fieldTypes[i + fieldRecords.length]; - const setter = field.setter; - const setterContext = field.setterContext; - fields[field.fieldName] = { - read: (ptr) => getterReturnType.fromWireType(getter(getterContext, ptr)), - write: (ptr, o) => { - var destructors = []; - setter(setterContext, ptr, setterArgumentType.toWireType(destructors, o)); - runDestructors(destructors); - }, - optional: getterReturnType.optional, - }; - } - - return [{ - name: reg.name, - fromWireType: (ptr) => { - var rv = {}; - for (var i in fields) { - rv[i] = fields[i].read(ptr); - } - rawDestructor(ptr); - return rv; - }, - toWireType: (destructors, o) => { - // todo: Here we have an opportunity for -O3 level "unsafe" optimizations: - // assume all fields are present without checking. - for (var fieldName in fields) { - if (!(fieldName in o) && !fields[fieldName].optional) { - throw new TypeError(`Missing field: "${fieldName}"`); - } - } - var ptr = rawConstructor(); - for (fieldName in fields) { - fields[fieldName].write(ptr, o[fieldName]); - } - if (destructors !== null) { - destructors.push(rawDestructor, ptr); - } - return ptr; - }, - readValueFromPointer: readPointer, - destructorFunction: rawDestructor, - }]; - }); - }; - - var AsciiToString = (ptr) => { - var str = ''; - while (1) { - var ch = HEAPU8[ptr++]; - if (!ch) return str; - str += String.fromCharCode(ch); - } - }; - - - - - var BindingError = class BindingError extends Error { constructor(message) { super(message); this.name = 'BindingError'; }}; - var throwBindingError = (message) => { throw new BindingError(message); }; - /** @param {Object=} options */ - function sharedRegisterType(rawType, registeredInstance, options = {}) { - var name = registeredInstance.name; - if (!rawType) { - throwBindingError(`type "${name}" must have a positive integer typeid pointer`); - } - if (registeredTypes.hasOwnProperty(rawType)) { - if (options.ignoreDuplicateRegistrations) { - return; - } else { - throwBindingError(`Cannot register type '${name}' twice`); - } - } - - registeredTypes[rawType] = registeredInstance; - delete typeDependencies[rawType]; - - if (awaitingDependencies.hasOwnProperty(rawType)) { - var callbacks = awaitingDependencies[rawType]; - delete awaitingDependencies[rawType]; - callbacks.forEach((cb) => cb()); - } - } - /** @param {Object=} options */ - function registerType(rawType, registeredInstance, options = {}) { - return sharedRegisterType(rawType, registeredInstance, options); - } - - var integerReadValueFromPointer = (name, width, signed) => { - // integers are quite common, so generate very specialized functions - switch (width) { - case 1: return signed ? - (pointer) => HEAP8[pointer] : - (pointer) => HEAPU8[pointer]; - case 2: return signed ? - (pointer) => HEAP16[((pointer)>>1)] : - (pointer) => HEAPU16[((pointer)>>1)] - case 4: return signed ? - (pointer) => HEAP32[((pointer)>>2)] : - (pointer) => HEAPU32[((pointer)>>2)] - case 8: return signed ? - (pointer) => HEAP64[((pointer)>>3)] : - (pointer) => HEAPU64[((pointer)>>3)] - default: - throw new TypeError(`invalid integer width (${width}): ${name}`); - } - }; - /** @suppress {globalThis} */ - var __embind_register_bigint = (primitiveType, name, size, minRange, maxRange) => { - name = AsciiToString(name); - - const isUnsignedType = minRange === 0n; - - let fromWireType = (value) => value; - if (isUnsignedType) { - // uint64 get converted to int64 in ABI, fix them up like we do for 32-bit integers. - const bitSize = size * 8; - fromWireType = (value) => { - return BigInt.asUintN(bitSize, value); - } - maxRange = fromWireType(maxRange); - } - - registerType(primitiveType, { - name, - fromWireType: fromWireType, - toWireType: (destructors, value) => { - if (typeof value == "number") { - value = BigInt(value); - } - return value; - }, - readValueFromPointer: integerReadValueFromPointer(name, size, !isUnsignedType), - destructorFunction: null, // This type does not need a destructor - }); - }; - - - /** @suppress {globalThis} */ - var __embind_register_bool = (rawType, name, trueValue, falseValue) => { - name = AsciiToString(name); - registerType(rawType, { - name, - fromWireType: function(wt) { - // ambiguous emscripten ABI: sometimes return values are - // true or false, and sometimes integers (0 or 1) - return !!wt; - }, - toWireType: function(destructors, o) { - return o ? trueValue : falseValue; - }, - readValueFromPointer: function(pointer) { - return this.fromWireType(HEAPU8[pointer]); - }, - destructorFunction: null, // This type does not need a destructor - }); - }; - - - - var shallowCopyInternalPointer = (o) => { - return { - count: o.count, - deleteScheduled: o.deleteScheduled, - preservePointerOnDelete: o.preservePointerOnDelete, - ptr: o.ptr, - ptrType: o.ptrType, - smartPtr: o.smartPtr, - smartPtrType: o.smartPtrType, - }; - }; - - var throwInstanceAlreadyDeleted = (obj) => { - function getInstanceTypeName(handle) { - return handle.$$.ptrType.registeredClass.name; - } - throwBindingError(getInstanceTypeName(obj) + ' instance already deleted'); - }; - - var finalizationRegistry = false; - - var detachFinalizer = (handle) => {}; - - var runDestructor = ($$) => { - if ($$.smartPtr) { - $$.smartPtrType.rawDestructor($$.smartPtr); - } else { - $$.ptrType.registeredClass.rawDestructor($$.ptr); - } - }; - var releaseClassHandle = ($$) => { - $$.count.value -= 1; - var toDelete = 0 === $$.count.value; - if (toDelete) { - runDestructor($$); - } - }; - var attachFinalizer = (handle) => { - if (!globalThis.FinalizationRegistry) { - attachFinalizer = (handle) => handle; - return handle; - } - // If the running environment has a FinalizationRegistry (see - // https://github.com/tc39/proposal-weakrefs), then attach finalizers - // for class handles. We check for the presence of FinalizationRegistry - // at run-time, not build-time. - finalizationRegistry = new FinalizationRegistry((info) => { - releaseClassHandle(info.$$); - }); - attachFinalizer = (handle) => { - var $$ = handle.$$; - var hasSmartPtr = !!$$.smartPtr; - if (hasSmartPtr) { - // We should not call the destructor on raw pointers in case other code expects the pointee to live - var info = { $$: $$ }; - finalizationRegistry.register(handle, info, handle); - } - return handle; - }; - detachFinalizer = (handle) => finalizationRegistry.unregister(handle); - return attachFinalizer(handle); - }; - - - - - var deletionQueue = []; - var flushPendingDeletes = () => { - while (deletionQueue.length) { - var obj = deletionQueue.pop(); - obj.$$.deleteScheduled = false; - obj['delete'](); - } - }; - - var delayFunction; - var init_ClassHandle = () => { - let proto = ClassHandle.prototype; - - Object.assign(proto, { - "isAliasOf"(other) { - if (!(this instanceof ClassHandle)) { - return false; - } - if (!(other instanceof ClassHandle)) { - return false; - } - - var leftClass = this.$$.ptrType.registeredClass; - var left = this.$$.ptr; - other.$$ = /** @type {Object} */ (other.$$); - var rightClass = other.$$.ptrType.registeredClass; - var right = other.$$.ptr; - - while (leftClass.baseClass) { - left = leftClass.upcast(left); - leftClass = leftClass.baseClass; - } - - while (rightClass.baseClass) { - right = rightClass.upcast(right); - rightClass = rightClass.baseClass; - } - - return leftClass === rightClass && left === right; - }, - - "clone"() { - if (!this.$$.ptr) { - throwInstanceAlreadyDeleted(this); - } - - if (this.$$.preservePointerOnDelete) { - this.$$.count.value += 1; - return this; - } else { - var clone = attachFinalizer(Object.create(Object.getPrototypeOf(this), { - $$: { - value: shallowCopyInternalPointer(this.$$), - } - })); - - clone.$$.count.value += 1; - clone.$$.deleteScheduled = false; - return clone; - } - }, - - "delete"() { - if (!this.$$.ptr) { - throwInstanceAlreadyDeleted(this); - } - - if (this.$$.deleteScheduled && !this.$$.preservePointerOnDelete) { - throwBindingError('Object already scheduled for deletion'); - } - - detachFinalizer(this); - releaseClassHandle(this.$$); - - if (!this.$$.preservePointerOnDelete) { - this.$$.smartPtr = undefined; - this.$$.ptr = undefined; - } - }, - - "isDeleted"() { - return !this.$$.ptr; - }, - - "deleteLater"() { - if (!this.$$.ptr) { - throwInstanceAlreadyDeleted(this); - } - if (this.$$.deleteScheduled && !this.$$.preservePointerOnDelete) { - throwBindingError('Object already scheduled for deletion'); - } - deletionQueue.push(this); - if (deletionQueue.length === 1 && delayFunction) { - delayFunction(flushPendingDeletes); - } - this.$$.deleteScheduled = true; - return this; - }, - }); - - // Support `using ...` from https://github.com/tc39/proposal-explicit-resource-management. - const symbolDispose = Symbol.dispose; - if (symbolDispose) { - proto[symbolDispose] = proto['delete']; - } - }; - /** @constructor */ - function ClassHandle() { - } - - var createNamedFunction = (name, func) => Object.defineProperty(func, 'name', { value: name }); - - var registeredPointers = { - }; - - var ensureOverloadTable = (proto, methodName, humanName) => { - if (undefined === proto[methodName].overloadTable) { - var prevFunc = proto[methodName]; - // Inject an overload resolver function that routes to the appropriate overload based on the number of arguments. - proto[methodName] = function(...args) { - // TODO This check can be removed in -O3 level "unsafe" optimizations. - if (!proto[methodName].overloadTable.hasOwnProperty(args.length)) { - throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`); - } - return proto[methodName].overloadTable[args.length].apply(this, args); - }; - // Move the previous function into the overload table. - proto[methodName].overloadTable = []; - proto[methodName].overloadTable[prevFunc.argCount] = prevFunc; - } - }; - - /** @param {number=} numArguments */ - var exposePublicSymbol = (name, value, numArguments) => { - if (Module.hasOwnProperty(name)) { - if (undefined === numArguments || (undefined !== Module[name].overloadTable && undefined !== Module[name].overloadTable[numArguments])) { - throwBindingError(`Cannot register public name '${name}' twice`); - } - - // We are exposing a function with the same name as an existing function. Create an overload table and a function selector - // that routes between the two. - ensureOverloadTable(Module, name, name); - if (Module[name].overloadTable.hasOwnProperty(numArguments)) { - throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`); - } - // Add the new function into the overload table. - Module[name].overloadTable[numArguments] = value; - } else { - Module[name] = value; - Module[name].argCount = numArguments; - } - }; - - var char_0 = 48; - - var char_9 = 57; - var makeLegalFunctionName = (name) => { - name = name.replace(/[^a-zA-Z0-9_]/g, '$'); - var f = name.charCodeAt(0); - if (f >= char_0 && f <= char_9) { - return `_${name}`; - } - return name; - }; - - - /** @constructor */ - function RegisteredClass(name, - constructor, - instancePrototype, - rawDestructor, - baseClass, - getActualType, - upcast, - downcast) { - this.name = name; - this.constructor = constructor; - this.instancePrototype = instancePrototype; - this.rawDestructor = rawDestructor; - this.baseClass = baseClass; - this.getActualType = getActualType; - this.upcast = upcast; - this.downcast = downcast; - this.pureVirtualFunctions = []; - } - - - var upcastPointer = (ptr, ptrClass, desiredClass) => { - while (ptrClass !== desiredClass) { - if (!ptrClass.upcast) { - throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`); - } - ptr = ptrClass.upcast(ptr); - ptrClass = ptrClass.baseClass; - } - return ptr; - }; - - var embindRepr = (v) => { - if (v === null) { - return 'null'; - } - var t = typeof v; - if (t === 'object' || t === 'array' || t === 'function') { - return v.toString(); - } else { - return '' + v; - } - }; - /** @suppress {globalThis} */ - function constNoSmartPtrRawPointerToWireType(destructors, handle) { - if (handle === null) { - if (this.isReference) { - throwBindingError(`null is not a valid ${this.name}`); - } - return 0; - } - - if (!handle.$$) { - throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`); - } - if (!handle.$$.ptr) { - throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`); - } - var handleClass = handle.$$.ptrType.registeredClass; - var ptr = upcastPointer(handle.$$.ptr, handleClass, this.registeredClass); - return ptr; - } - - - /** @suppress {globalThis} */ - function genericPointerToWireType(destructors, handle) { - var ptr; - if (handle === null) { - if (this.isReference) { - throwBindingError(`null is not a valid ${this.name}`); - } - - if (this.isSmartPointer) { - ptr = this.rawConstructor(); - if (destructors !== null) { - destructors.push(this.rawDestructor, ptr); - } - return ptr; - } else { - return 0; - } - } - - if (!handle || !handle.$$) { - throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`); - } - if (!handle.$$.ptr) { - throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`); - } - if (!this.isConst && handle.$$.ptrType.isConst) { - throwBindingError(`Cannot convert argument of type ${(handle.$$.smartPtrType ? handle.$$.smartPtrType.name : handle.$$.ptrType.name)} to parameter type ${this.name}`); - } - var handleClass = handle.$$.ptrType.registeredClass; - ptr = upcastPointer(handle.$$.ptr, handleClass, this.registeredClass); - - if (this.isSmartPointer) { - // TODO: this is not strictly true - // We could support BY_EMVAL conversions from raw pointers to smart pointers - // because the smart pointer can hold a reference to the handle - if (undefined === handle.$$.smartPtr) { - throwBindingError('Passing raw pointer to smart pointer is illegal'); - } - - switch (this.sharingPolicy) { - case 0: // NONE - // no upcasting - if (handle.$$.smartPtrType === this) { - ptr = handle.$$.smartPtr; - } else { - throwBindingError(`Cannot convert argument of type ${(handle.$$.smartPtrType ? handle.$$.smartPtrType.name : handle.$$.ptrType.name)} to parameter type ${this.name}`); - } - break; - - case 1: // INTRUSIVE - ptr = handle.$$.smartPtr; - break; - - case 2: // BY_EMVAL - if (handle.$$.smartPtrType === this) { - ptr = handle.$$.smartPtr; - } else { - var clonedHandle = handle['clone'](); - ptr = this.rawShare( - ptr, - Emval.toHandle(() => clonedHandle['delete']()) - ); - if (destructors !== null) { - destructors.push(this.rawDestructor, ptr); - } - } - break; - - default: - throwBindingError('Unsupporting sharing policy'); - } - } - return ptr; - } - - - - /** @suppress {globalThis} */ - function nonConstNoSmartPtrRawPointerToWireType(destructors, handle) { - if (handle === null) { - if (this.isReference) { - throwBindingError(`null is not a valid ${this.name}`); - } - return 0; - } - - if (!handle.$$) { - throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`); - } - if (!handle.$$.ptr) { - throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`); - } - if (handle.$$.ptrType.isConst) { - throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`); - } - var handleClass = handle.$$.ptrType.registeredClass; - var ptr = upcastPointer(handle.$$.ptr, handleClass, this.registeredClass); - return ptr; - } - - - - var downcastPointer = (ptr, ptrClass, desiredClass) => { - if (ptrClass === desiredClass) { - return ptr; - } - if (undefined === desiredClass.baseClass) { - return null; // no conversion - } - - var rv = downcastPointer(ptr, ptrClass, desiredClass.baseClass); - if (rv === null) { - return null; - } - return desiredClass.downcast(rv); - }; - - - var registeredInstances = { - }; - - var getBasestPointer = (class_, ptr) => { - if (ptr === undefined) { - throwBindingError('ptr should not be undefined'); - } - while (class_.baseClass) { - ptr = class_.upcast(ptr); - class_ = class_.baseClass; - } - return ptr; - }; - var getInheritedInstance = (class_, ptr) => { - ptr = getBasestPointer(class_, ptr); - return registeredInstances[ptr]; - }; - - - var makeClassHandle = (prototype, record) => { - if (!record.ptrType || !record.ptr) { - throwInternalError('makeClassHandle requires ptr and ptrType'); - } - var hasSmartPtrType = !!record.smartPtrType; - var hasSmartPtr = !!record.smartPtr; - if (hasSmartPtrType !== hasSmartPtr) { - throwInternalError('Both smartPtrType and smartPtr must be specified'); - } - record.count = { value: 1 }; - return attachFinalizer(Object.create(prototype, { - $$: { - value: record, - writable: true, - }, - })); - }; - /** @suppress {globalThis} */ - function RegisteredPointer_fromWireType(ptr) { - // ptr is a raw pointer (or a raw smartpointer) - - // rawPointer is a maybe-null raw pointer - var rawPointer = this.getPointee(ptr); - if (!rawPointer) { - this.destructor(ptr); - return null; - } - - var registeredInstance = getInheritedInstance(this.registeredClass, rawPointer); - if (undefined !== registeredInstance) { - // JS object has been neutered, time to repopulate it - if (0 === registeredInstance.$$.count.value) { - registeredInstance.$$.ptr = rawPointer; - registeredInstance.$$.smartPtr = ptr; - return registeredInstance['clone'](); - } else { - // else, just increment reference count on existing object - // it already has a reference to the smart pointer - var rv = registeredInstance['clone'](); - this.destructor(ptr); - return rv; - } - } - - function makeDefaultHandle() { - if (this.isSmartPointer) { - return makeClassHandle(this.registeredClass.instancePrototype, { - ptrType: this.pointeeType, - ptr: rawPointer, - smartPtrType: this, - smartPtr: ptr, - }); - } else { - return makeClassHandle(this.registeredClass.instancePrototype, { - ptrType: this, - ptr, - }); - } - } - - var actualType = this.registeredClass.getActualType(rawPointer); - var registeredPointerRecord = registeredPointers[actualType]; - if (!registeredPointerRecord) { - return makeDefaultHandle.call(this); - } - - var toType; - if (this.isConst) { - toType = registeredPointerRecord.constPointerType; - } else { - toType = registeredPointerRecord.pointerType; - } - var dp = downcastPointer( - rawPointer, - this.registeredClass, - toType.registeredClass); - if (dp === null) { - return makeDefaultHandle.call(this); - } - if (this.isSmartPointer) { - return makeClassHandle(toType.registeredClass.instancePrototype, { - ptrType: toType, - ptr: dp, - smartPtrType: this, - smartPtr: ptr, - }); - } else { - return makeClassHandle(toType.registeredClass.instancePrototype, { - ptrType: toType, - ptr: dp, - }); - } - } - var init_RegisteredPointer = () => { - Object.assign(RegisteredPointer.prototype, { - getPointee(ptr) { - if (this.rawGetPointee) { - ptr = this.rawGetPointee(ptr); - } - return ptr; - }, - destructor(ptr) { - this.rawDestructor?.(ptr); - }, - readValueFromPointer: readPointer, - fromWireType: RegisteredPointer_fromWireType, - }); - }; - /** @constructor - @param {*=} pointeeType, - @param {*=} sharingPolicy, - @param {*=} rawGetPointee, - @param {*=} rawConstructor, - @param {*=} rawShare, - @param {*=} rawDestructor, - */ - function RegisteredPointer( - name, - registeredClass, - isReference, - isConst, - - // smart pointer properties - isSmartPointer, - pointeeType, - sharingPolicy, - rawGetPointee, - rawConstructor, - rawShare, - rawDestructor - ) { - this.name = name; - this.registeredClass = registeredClass; - this.isReference = isReference; - this.isConst = isConst; - - // smart pointer properties - this.isSmartPointer = isSmartPointer; - this.pointeeType = pointeeType; - this.sharingPolicy = sharingPolicy; - this.rawGetPointee = rawGetPointee; - this.rawConstructor = rawConstructor; - this.rawShare = rawShare; - this.rawDestructor = rawDestructor; - - if (!isSmartPointer && registeredClass.baseClass === undefined) { - if (isConst) { - this.toWireType = constNoSmartPtrRawPointerToWireType; - this.destructorFunction = null; - } else { - this.toWireType = nonConstNoSmartPtrRawPointerToWireType; - this.destructorFunction = null; - } - } else { - this.toWireType = genericPointerToWireType; - // Here we must leave this.destructorFunction undefined, since whether genericPointerToWireType returns - // a pointer that needs to be freed up is runtime-dependent, and cannot be evaluated at registration time. - // TODO: Create an alternative mechanism that allows removing the use of var destructors = []; array in - // craftInvokerFunction altogether. - } - } - - /** @param {number=} numArguments */ - var replacePublicSymbol = (name, value, numArguments) => { - if (!Module.hasOwnProperty(name)) { - throwInternalError('Replacing nonexistent public symbol'); - } - // If there's an overload table for this symbol, replace the symbol in the overload table instead. - if (undefined !== Module[name].overloadTable && undefined !== numArguments) { - Module[name].overloadTable[numArguments] = value; - } else { - Module[name] = value; - Module[name].argCount = numArguments; - } - }; - - - - var wasmTableMirror = []; - - - var getWasmTableEntry = (funcPtr) => { - var func = wasmTableMirror[funcPtr]; - if (!func) { - /** @suppress {checkTypes} */ - wasmTableMirror[funcPtr] = func = wasmTable.get(funcPtr); - } - return func; - }; - var embind__requireFunction = (signature, rawFunction, isAsync = false) => { - - signature = AsciiToString(signature); - - function makeDynCaller() { - var rtn = getWasmTableEntry(rawFunction); - return rtn; - } - - var fp = makeDynCaller(); - if (typeof fp != 'function') { - throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`); - } - return fp; - }; - - - - class UnboundTypeError extends Error {} - - - - var getTypeName = (type) => { - var ptr = ___getTypeName(type); - var rv = AsciiToString(ptr); - _free(ptr); - return rv; - }; - var throwUnboundTypeError = (message, types) => { - var unboundTypes = []; - var seen = {}; - function visit(type) { - if (seen[type]) { - return; - } - if (registeredTypes[type]) { - return; - } - if (typeDependencies[type]) { - typeDependencies[type].forEach(visit); - return; - } - unboundTypes.push(type); - seen[type] = true; - } - types.forEach(visit); - - throw new UnboundTypeError(`${message}: ` + unboundTypes.map(getTypeName).join([', '])); - }; - - var __embind_register_class = (rawType, - rawPointerType, - rawConstPointerType, - baseClassRawType, - getActualTypeSignature, - getActualType, - upcastSignature, - upcast, - downcastSignature, - downcast, - name, - destructorSignature, - rawDestructor) => { - name = AsciiToString(name); - getActualType = embind__requireFunction(getActualTypeSignature, getActualType); - upcast &&= embind__requireFunction(upcastSignature, upcast); - downcast &&= embind__requireFunction(downcastSignature, downcast); - rawDestructor = embind__requireFunction(destructorSignature, rawDestructor); - var legalFunctionName = makeLegalFunctionName(name); - - exposePublicSymbol(legalFunctionName, function() { - // this code cannot run if baseClassRawType is zero - throwUnboundTypeError(`Cannot construct ${name} due to unbound types`, [baseClassRawType]); - }); - - whenDependentTypesAreResolved( - [rawType, rawPointerType, rawConstPointerType], - baseClassRawType ? [baseClassRawType] : [], - (base) => { - base = base[0]; - - var baseClass; - var basePrototype; - if (baseClassRawType) { - baseClass = base.registeredClass; - basePrototype = baseClass.instancePrototype; - } else { - basePrototype = ClassHandle.prototype; - } - - var constructor = createNamedFunction(name, function(...args) { - if (Object.getPrototypeOf(this) !== instancePrototype) { - throw new BindingError(`Use 'new' to construct ${name}`); - } - if (undefined === registeredClass.constructor_body) { - throw new BindingError(`${name} has no accessible constructor`); - } - var body = registeredClass.constructor_body[args.length]; - if (undefined === body) { - throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`); - } - return body.apply(this, args); - }); - - var instancePrototype = Object.create(basePrototype, { - constructor: { value: constructor }, - }); - - constructor.prototype = instancePrototype; - - var registeredClass = new RegisteredClass(name, - constructor, - instancePrototype, - rawDestructor, - baseClass, - getActualType, - upcast, - downcast); - - if (registeredClass.baseClass) { - // Keep track of class hierarchy. Used to allow sub-classes to inherit class functions. - registeredClass.baseClass.__derivedClasses ??= []; - - registeredClass.baseClass.__derivedClasses.push(registeredClass); - } - - var referenceConverter = new RegisteredPointer(name, - registeredClass, - true, - false, - false); - - var pointerConverter = new RegisteredPointer(name + '*', - registeredClass, - false, - false, - false); - - var constPointerConverter = new RegisteredPointer(name + ' const*', - registeredClass, - false, - true, - false); - - registeredPointers[rawType] = { - pointerType: pointerConverter, - constPointerType: constPointerConverter - }; - - replacePublicSymbol(legalFunctionName, constructor); - - return [referenceConverter, pointerConverter, constPointerConverter]; - } - ); - }; - - var heap32VectorToArray = (count, firstElement) => { - var array = []; - for (var i = 0; i < count; i++) { - // TODO(https://github.com/emscripten-core/emscripten/issues/17310): - // Find a way to hoist the `>> 2` or `>> 3` out of this loop. - array.push(HEAPU32[(((firstElement)+(i * 4))>>2)]); - } - return array; - }; - - - - - - - function usesDestructorStack(argTypes) { - // Skip return value at index 0 - it's not deleted here. - for (var i = 1; i < argTypes.length; ++i) { - // The type does not define a destructor function - must use dynamic stack - if (argTypes[i] !== null && argTypes[i].destructorFunction === undefined) { - return true; - } - } - return false; - } - - function createJsInvoker(argTypes, isClassMethodFunc, returns, isAsync) { - var needsDestructorStack = usesDestructorStack(argTypes); - var argCount = argTypes.length - 2; - var argsList = []; - var argsListWired = ['fn']; - if (isClassMethodFunc) { - argsListWired.push('thisWired'); - } - for (var i = 0; i < argCount; ++i) { - argsList.push(`arg${i}`) - argsListWired.push(`arg${i}Wired`) - } - argsList = argsList.join(',') - argsListWired = argsListWired.join(',') - - var invokerFnBody = `return function (${argsList}) {\n`; - - if (needsDestructorStack) { - invokerFnBody += "var destructors = [];\n"; - } - - var dtorStack = needsDestructorStack ? "destructors" : "null"; - var args1 = ["humanName", "throwBindingError", "invoker", "fn", "runDestructors", "fromRetWire", "toClassParamWire"]; - - if (isClassMethodFunc) { - invokerFnBody += `var thisWired = toClassParamWire(${dtorStack}, this);\n`; - } - - for (var i = 0; i < argCount; ++i) { - var argName = `toArg${i}Wire`; - invokerFnBody += `var arg${i}Wired = ${argName}(${dtorStack}, arg${i});\n`; - args1.push(argName); - } - - invokerFnBody += (returns || isAsync ? "var rv = ":"") + `invoker(${argsListWired});\n`; - - var returnVal = returns ? "rv" : ""; - - if (needsDestructorStack) { - invokerFnBody += "runDestructors(destructors);\n"; - } else { - for (var i = isClassMethodFunc?1:2; i < argTypes.length; ++i) { // Skip return value at index 0 - it's not deleted here. Also skip class type if not a method. - var paramName = (i === 1 ? "thisWired" : ("arg"+(i - 2)+"Wired")); - if (argTypes[i].destructorFunction !== null) { - invokerFnBody += `${paramName}_dtor(${paramName});\n`; - args1.push(`${paramName}_dtor`); - } - } - } - - if (returns) { - invokerFnBody += "var ret = fromRetWire(rv);\n" + - "return ret;\n"; - } else { - } - - invokerFnBody += "}\n"; - - return new Function(args1, invokerFnBody); - } - function craftInvokerFunction(humanName, argTypes, classType, cppInvokerFunc, cppTargetFunc, /** boolean= */ isAsync) { - // humanName: a human-readable string name for the function to be generated. - // argTypes: An array that contains the embind type objects for all types in the function signature. - // argTypes[0] is the type object for the function return value. - // argTypes[1] is the type object for function this object/class type, or null if not crafting an invoker for a class method. - // argTypes[2...] are the actual function parameters. - // classType: The embind type object for the class to be bound, or null if this is not a method of a class. - // cppInvokerFunc: JS Function object to the C++-side function that interops into C++ code. - // cppTargetFunc: Function pointer (an integer to FUNCTION_TABLE) to the target C++ function the cppInvokerFunc will end up calling. - // isAsync: Optional. If true, returns an async function. Async bindings are only supported with JSPI. - var argCount = argTypes.length; - - if (argCount < 2) { - throwBindingError("argTypes array size mismatch! Must at least get return value and 'this' types!"); - } - - var isClassMethodFunc = (argTypes[1] !== null && classType !== null); - - // Free functions with signature "void function()" do not need an invoker that marshalls between wire types. - // TODO: This omits argument count check - enable only at -O3 or similar. - // if (ENABLE_UNSAFE_OPTS && argCount == 2 && argTypes[0].name == "void" && !isClassMethodFunc) { - // return FUNCTION_TABLE[fn]; - // } - - // Determine if we need to use a dynamic stack to store the destructors for the function parameters. - // TODO: Remove this completely once all function invokers are being dynamically generated. - var needsDestructorStack = usesDestructorStack(argTypes); - - var returns = !argTypes[0].isVoid; - - var expectedArgCount = argCount - 2; - // Builld the arguments that will be passed into the closure around the invoker - // function. - var retType = argTypes[0]; - var instType = argTypes[1]; - var closureArgs = [humanName, throwBindingError, cppInvokerFunc, cppTargetFunc, runDestructors, retType.fromWireType.bind(retType), instType?.toWireType.bind(instType)]; - for (var i = 2; i < argCount; ++i) { - var argType = argTypes[i]; - closureArgs.push(argType.toWireType.bind(argType)); - } - if (!needsDestructorStack) { - // Skip return value at index 0 - it's not deleted here. Also skip class type if not a method. - for (var i = isClassMethodFunc?1:2; i < argTypes.length; ++i) { - if (argTypes[i].destructorFunction !== null) { - closureArgs.push(argTypes[i].destructorFunction); - } - } - } - - let invokerFactory = createJsInvoker(argTypes, isClassMethodFunc, returns, isAsync); - var invokerFn = invokerFactory(...closureArgs); - return createNamedFunction(humanName, invokerFn); - } - var __embind_register_class_constructor = ( - rawClassType, - argCount, - rawArgTypesAddr, - invokerSignature, - invoker, - rawConstructor - ) => { - var rawArgTypes = heap32VectorToArray(argCount, rawArgTypesAddr); - invoker = embind__requireFunction(invokerSignature, invoker); - var args = [rawConstructor]; - var destructors = []; - - whenDependentTypesAreResolved([], [rawClassType], (classType) => { - classType = classType[0]; - var humanName = `constructor ${classType.name}`; - - if (undefined === classType.registeredClass.constructor_body) { - classType.registeredClass.constructor_body = []; - } - if (undefined !== classType.registeredClass.constructor_body[argCount - 1]) { - throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`); - } - classType.registeredClass.constructor_body[argCount - 1] = () => { - throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`, rawArgTypes); - }; - - whenDependentTypesAreResolved([], rawArgTypes, (argTypes) => { - // Insert empty slot for context type (argTypes[1]). - argTypes.splice(1, 0, null); - classType.registeredClass.constructor_body[argCount - 1] = craftInvokerFunction(humanName, argTypes, null, invoker, rawConstructor); - return []; - }); - return []; - }); - }; - - - - - - - - var getFunctionName = (signature) => { - signature = signature.trim(); - const argsIndex = signature.indexOf("("); - if (argsIndex === -1) return signature; - return signature.slice(0, argsIndex); - }; - var __embind_register_class_function = (rawClassType, - methodName, - argCount, - rawArgTypesAddr, // [ReturnType, ThisType, Args...] - invokerSignature, - rawInvoker, - context, - isPureVirtual, - isAsync, - isNonnullReturn) => { - var rawArgTypes = heap32VectorToArray(argCount, rawArgTypesAddr); - methodName = AsciiToString(methodName); - methodName = getFunctionName(methodName); - rawInvoker = embind__requireFunction(invokerSignature, rawInvoker, isAsync); - - whenDependentTypesAreResolved([], [rawClassType], (classType) => { - classType = classType[0]; - var humanName = `${classType.name}.${methodName}`; - - if (methodName.startsWith("@@")) { - methodName = Symbol[methodName.substring(2)]; - } - - if (isPureVirtual) { - classType.registeredClass.pureVirtualFunctions.push(methodName); - } - - function unboundTypesHandler() { - throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`, rawArgTypes); - } - - var proto = classType.registeredClass.instancePrototype; - var method = proto[methodName]; - if (undefined === method || (undefined === method.overloadTable && method.className !== classType.name && method.argCount === argCount - 2)) { - // This is the first overload to be registered, OR we are replacing a - // function in the base class with a function in the derived class. - unboundTypesHandler.argCount = argCount - 2; - unboundTypesHandler.className = classType.name; - proto[methodName] = unboundTypesHandler; - } else { - // There was an existing function with the same name registered. Set up - // a function overload routing table. - ensureOverloadTable(proto, methodName, humanName); - proto[methodName].overloadTable[argCount - 2] = unboundTypesHandler; - } - - whenDependentTypesAreResolved([], rawArgTypes, (argTypes) => { - var memberFunction = craftInvokerFunction(humanName, argTypes, classType, rawInvoker, context, isAsync); - - // Replace the initial unbound-handler-stub function with the - // appropriate member function, now that all types are resolved. If - // multiple overloads are registered for this function, the function - // goes into an overload table. - if (undefined === proto[methodName].overloadTable) { - // Set argCount in case an overload is registered later - memberFunction.argCount = argCount - 2; - proto[methodName] = memberFunction; - } else { - proto[methodName].overloadTable[argCount - 2] = memberFunction; - } - - return []; - }); - return []; - }); - }; - - - var __embind_register_constant = (name, type, value) => { - name = AsciiToString(name); - whenDependentTypesAreResolved([], [type], (type) => { - type = type[0]; - Module[name] = type.fromWireType(value); - return []; - }); - }; - - - var emval_freelist = []; - - var emval_handles = [0,1,,1,null,1,true,1,false,1]; - var __emval_decref = (handle) => { - if (handle > 9 && 0 === --emval_handles[handle + 1]) { - emval_handles[handle] = undefined; - emval_freelist.push(handle); - } - }; - - - - var Emval = { - toValue:(handle) => { - if (!handle) { - throwBindingError(`Cannot use deleted val. handle = ${handle}`); - } - return emval_handles[handle]; - }, - toHandle:(value) => { - switch (value) { - case undefined: return 2; - case null: return 4; - case true: return 6; - case false: return 8; - default:{ - const handle = emval_freelist.pop() || emval_handles.length; - emval_handles[handle] = value; - emval_handles[handle + 1] = 1; - return handle; - } - } - }, - }; - - var EmValType = { - name: 'emscripten::val', - fromWireType: (handle) => { - var rv = Emval.toValue(handle); - __emval_decref(handle); - return rv; - }, - toWireType: (destructors, value) => Emval.toHandle(value), - readValueFromPointer: readPointer, - destructorFunction: null, // This type does not need a destructor - - // TODO: do we need a deleteObject here? write a test where - // emval is passed into JS via an interface - }; - var __embind_register_emval = (rawType) => registerType(rawType, EmValType); - - - var enumReadValueFromPointer = (name, width, signed) => { - switch (width) { - case 1: return signed ? - function(pointer) { return this.fromWireType(HEAP8[pointer]) } : - function(pointer) { return this.fromWireType(HEAPU8[pointer]) }; - case 2: return signed ? - function(pointer) { return this.fromWireType(HEAP16[((pointer)>>1)]) } : - function(pointer) { return this.fromWireType(HEAPU16[((pointer)>>1)]) }; - case 4: return signed ? - function(pointer) { return this.fromWireType(HEAP32[((pointer)>>2)]) } : - function(pointer) { return this.fromWireType(HEAPU32[((pointer)>>2)]) }; - default: - throw new TypeError(`invalid integer width (${width}): ${name}`); - } - }; - - - /** @suppress {globalThis} */ - var __embind_register_enum = (rawType, name, size, isSigned) => { - name = AsciiToString(name); - - function ctor() {} - ctor.values = {}; - - registerType(rawType, { - name, - constructor: ctor, - fromWireType: function(c) { - return this.constructor.values[c]; - }, - toWireType: (destructors, c) => c.value, - readValueFromPointer: enumReadValueFromPointer(name, size, isSigned), - destructorFunction: null, - }); - exposePublicSymbol(name, ctor); - }; - - - - - - var requireRegisteredType = (rawType, humanName) => { - var impl = registeredTypes[rawType]; - if (undefined === impl) { - throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`); - } - return impl; - }; - var __embind_register_enum_value = (rawEnumType, name, enumValue) => { - var enumType = requireRegisteredType(rawEnumType, 'enum'); - name = AsciiToString(name); - - var Enum = enumType.constructor; - - var Value = Object.create(enumType.constructor.prototype, { - value: {value: enumValue}, - constructor: {value: createNamedFunction(`${enumType.name}_${name}`, function() {})}, - }); - Enum.values[enumValue] = Value; - Enum[name] = Value; - }; - - var floatReadValueFromPointer = (name, width) => { - switch (width) { - case 4: return function(pointer) { - return this.fromWireType(HEAPF32[((pointer)>>2)]); - }; - case 8: return function(pointer) { - return this.fromWireType(HEAPF64[((pointer)>>3)]); - }; - default: - throw new TypeError(`invalid float width (${width}): ${name}`); - } - }; - - - var __embind_register_float = (rawType, name, size) => { - name = AsciiToString(name); - registerType(rawType, { - name, - fromWireType: (value) => value, - toWireType: (destructors, value) => { - // The VM will perform JS to Wasm value conversion, according to the spec: - // https://www.w3.org/TR/wasm-js-api-1/#towebassemblyvalue - return value; - }, - readValueFromPointer: floatReadValueFromPointer(name, size), - destructorFunction: null, // This type does not need a destructor - }); - }; - - - - - - - - - - var __embind_register_function = (name, argCount, rawArgTypesAddr, signature, rawInvoker, fn, isAsync, isNonnullReturn) => { - var argTypes = heap32VectorToArray(argCount, rawArgTypesAddr); - name = AsciiToString(name); - name = getFunctionName(name); - - rawInvoker = embind__requireFunction(signature, rawInvoker, isAsync); - - exposePublicSymbol(name, function() { - throwUnboundTypeError(`Cannot call ${name} due to unbound types`, argTypes); - }, argCount - 1); - - whenDependentTypesAreResolved([], argTypes, (argTypes) => { - var invokerArgsArray = [argTypes[0] /* return value */, null /* no class 'this'*/].concat(argTypes.slice(1) /* actual params */); - replacePublicSymbol(name, craftInvokerFunction(name, invokerArgsArray, null /* no class 'this'*/, rawInvoker, fn, isAsync), argCount - 1); - return []; - }); - }; - - - - /** @suppress {globalThis} */ - var __embind_register_integer = (primitiveType, name, size, minRange, maxRange) => { - name = AsciiToString(name); - - const isUnsignedType = minRange === 0; - - let fromWireType = (value) => value; - if (isUnsignedType) { - var bitshift = 32 - 8*size; - fromWireType = (value) => (value << bitshift) >>> bitshift; - maxRange = fromWireType(maxRange); - } - - registerType(primitiveType, { - name, - fromWireType: fromWireType, - toWireType: (destructors, value) => { - // The VM will perform JS to Wasm value conversion, according to the spec: - // https://www.w3.org/TR/wasm-js-api-1/#towebassemblyvalue - return value; - }, - readValueFromPointer: integerReadValueFromPointer(name, size, minRange !== 0), - destructorFunction: null, // This type does not need a destructor - }); - }; - - - var __embind_register_memory_view = (rawType, dataTypeIndex, name) => { - var typeMapping = [ - Int8Array, - Uint8Array, - Int16Array, - Uint16Array, - Int32Array, - Uint32Array, - Float32Array, - Float64Array, - BigInt64Array, - BigUint64Array, - ]; - - var TA = typeMapping[dataTypeIndex]; - - function decodeMemoryView(handle) { - var size = HEAPU32[((handle)>>2)]; - var data = HEAPU32[(((handle)+(4))>>2)]; - return new TA(HEAP8.buffer, data, size); - } - - name = AsciiToString(name); - registerType(rawType, { - name, - fromWireType: decodeMemoryView, - readValueFromPointer: decodeMemoryView, - }, { - ignoreDuplicateRegistrations: true, - }); - }; - - - - - - var stringToUTF8 = (str, outPtr, maxBytesToWrite) => { - return stringToUTF8Array(str, HEAPU8, outPtr, maxBytesToWrite); - }; - - - - - var __embind_register_std_string = (rawType, name) => { - name = AsciiToString(name); - var stdStringIsUTF8 = true; - - registerType(rawType, { - name, - // For some method names we use string keys here since they are part of - // the public/external API and/or used by the runtime-generated code. - fromWireType(value) { - var length = HEAPU32[((value)>>2)]; - var payload = value + 4; - - var str; - if (stdStringIsUTF8) { - str = UTF8ToString(payload, length, true); - } else { - str = ''; - for (var i = 0; i < length; ++i) { - str += String.fromCharCode(HEAPU8[payload + i]); - } - } - - _free(value); - - return str; - }, - toWireType(destructors, value) { - if (value instanceof ArrayBuffer) { - value = new Uint8Array(value); - } - - var length; - var valueIsOfTypeString = (typeof value == 'string'); - - // We accept `string` or array views with single byte elements - if (!(valueIsOfTypeString || (ArrayBuffer.isView(value) && value.BYTES_PER_ELEMENT == 1))) { - throwBindingError('Cannot pass non-string to std::string'); - } - if (stdStringIsUTF8 && valueIsOfTypeString) { - length = lengthBytesUTF8(value); - } else { - length = value.length; - } - - // assumes POINTER_SIZE alignment - var base = _malloc(4 + length + 1); - var ptr = base + 4; - HEAPU32[((base)>>2)] = length; - if (valueIsOfTypeString) { - if (stdStringIsUTF8) { - stringToUTF8(value, ptr, length + 1); - } else { - for (var i = 0; i < length; ++i) { - var charCode = value.charCodeAt(i); - if (charCode > 255) { - _free(base); - throwBindingError('String has UTF-16 code units that do not fit in 8 bits'); - } - HEAPU8[ptr + i] = charCode; - } - } - } else { - HEAPU8.set(value, ptr); - } - - if (destructors !== null) { - destructors.push(_free, base); - } - return base; - }, - readValueFromPointer: readPointer, - destructorFunction(ptr) { - _free(ptr); - }, - }); - }; - - - - - var UTF16Decoder = globalThis.TextDecoder ? new TextDecoder('utf-16le') : undefined;; - - var UTF16ToString = (ptr, maxBytesToRead, ignoreNul) => { - var idx = ((ptr)>>1); - var endIdx = findStringEnd(HEAPU16, idx, maxBytesToRead / 2, ignoreNul); - - // When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it. - if (endIdx - idx > 16 && UTF16Decoder) - return UTF16Decoder.decode(HEAPU16.subarray(idx, endIdx)); - - // Fallback: decode without UTF16Decoder - var str = ''; - - // If maxBytesToRead is not passed explicitly, it will be undefined, and the - // for-loop's condition will always evaluate to true. The loop is then - // terminated on the first null char. - for (var i = idx; i < endIdx; ++i) { - var codeUnit = HEAPU16[i]; - // fromCharCode constructs a character from a UTF-16 code unit, so we can - // pass the UTF16 string right through. - str += String.fromCharCode(codeUnit); - } - - return str; - }; - - var stringToUTF16 = (str, outPtr, maxBytesToWrite) => { - // Backwards compatibility: if max bytes is not specified, assume unsafe unbounded write is allowed. - maxBytesToWrite ??= 0x7FFFFFFF; - if (maxBytesToWrite < 2) return 0; - maxBytesToWrite -= 2; // Null terminator. - var startPtr = outPtr; - var numCharsToWrite = (maxBytesToWrite < str.length*2) ? (maxBytesToWrite / 2) : str.length; - for (var i = 0; i < numCharsToWrite; ++i) { - // charCodeAt returns a UTF-16 encoded code unit, so it can be directly written to the HEAP. - var codeUnit = str.charCodeAt(i); // possibly a lead surrogate - HEAP16[((outPtr)>>1)] = codeUnit; - outPtr += 2; - } - // Null-terminate the pointer to the HEAP. - HEAP16[((outPtr)>>1)] = 0; - return outPtr - startPtr; - }; - - var lengthBytesUTF16 = (str) => str.length*2; - - var UTF32ToString = (ptr, maxBytesToRead, ignoreNul) => { - var str = ''; - var startIdx = ((ptr)>>2); - // If maxBytesToRead is not passed explicitly, it will be undefined, and this - // will always evaluate to true. This saves on code size. - for (var i = 0; !(i >= maxBytesToRead / 4); i++) { - var utf32 = HEAPU32[startIdx + i]; - if (!utf32 && !ignoreNul) break; - str += String.fromCodePoint(utf32); - } - return str; - }; - - var stringToUTF32 = (str, outPtr, maxBytesToWrite) => { - // Backwards compatibility: if max bytes is not specified, assume unsafe unbounded write is allowed. - maxBytesToWrite ??= 0x7FFFFFFF; - if (maxBytesToWrite < 4) return 0; - var startPtr = outPtr; - var endPtr = startPtr + maxBytesToWrite - 4; - for (var i = 0; i < str.length; ++i) { - var codePoint = str.codePointAt(i); - // Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16. - // We need to manually skip over the second code unit for correct iteration. - if (codePoint > 0xFFFF) { - i++; - } - HEAP32[((outPtr)>>2)] = codePoint; - outPtr += 4; - if (outPtr + 4 > endPtr) break; - } - // Null-terminate the pointer to the HEAP. - HEAP32[((outPtr)>>2)] = 0; - return outPtr - startPtr; - }; - - var lengthBytesUTF32 = (str) => { - var len = 0; - for (var i = 0; i < str.length; ++i) { - var codePoint = str.codePointAt(i); - // Gotcha: if codePoint is over 0xFFFF, it is represented as a surrogate pair in UTF-16. - // We need to manually skip over the second code unit for correct iteration. - if (codePoint > 0xFFFF) { - i++; - } - len += 4; - } - - return len; - }; - var __embind_register_std_wstring = (rawType, charSize, name) => { - name = AsciiToString(name); - var decodeString, encodeString, lengthBytesUTF; - if (charSize === 2) { - decodeString = UTF16ToString; - encodeString = stringToUTF16; - lengthBytesUTF = lengthBytesUTF16; - } else { - decodeString = UTF32ToString; - encodeString = stringToUTF32; - lengthBytesUTF = lengthBytesUTF32; - } - registerType(rawType, { - name, - fromWireType: (value) => { - // Code mostly taken from _embind_register_std_string fromWireType - var length = HEAPU32[((value)>>2)]; - var str = decodeString(value + 4, length * charSize, true); - - _free(value); - - return str; - }, - toWireType: (destructors, value) => { - if (!(typeof value == 'string')) { - throwBindingError(`Cannot pass non-string to C++ string type ${name}`); - } - - // assumes POINTER_SIZE alignment - var length = lengthBytesUTF(value); - var ptr = _malloc(4 + length + charSize); - HEAPU32[((ptr)>>2)] = length / charSize; - - encodeString(value, ptr + 4, length + charSize); - - if (destructors !== null) { - destructors.push(_free, ptr); - } - return ptr; - }, - readValueFromPointer: readPointer, - destructorFunction(ptr) { - _free(ptr); - } - }); - }; - - - - var __embind_register_value_object = ( - rawType, - name, - constructorSignature, - rawConstructor, - destructorSignature, - rawDestructor - ) => { - structRegistrations[rawType] = { - name: AsciiToString(name), - rawConstructor: embind__requireFunction(constructorSignature, rawConstructor), - rawDestructor: embind__requireFunction(destructorSignature, rawDestructor), - fields: [], - }; - }; - - - - var __embind_register_value_object_field = ( - structType, - fieldName, - getterReturnType, - getterSignature, - getter, - getterContext, - setterArgumentType, - setterSignature, - setter, - setterContext - ) => { - structRegistrations[structType].fields.push({ - fieldName: AsciiToString(fieldName), - getterReturnType, - getter: embind__requireFunction(getterSignature, getter), - getterContext, - setterArgumentType, - setter: embind__requireFunction(setterSignature, setter), - setterContext, - }); - }; - - - var __embind_register_void = (rawType, name) => { - name = AsciiToString(name); - registerType(rawType, { - isVoid: true, // void return values can be optimized out sometimes - name, - fromWireType: () => undefined, - // TODO: assert if anything else is given? - toWireType: (destructors, o) => undefined, - }); - }; - - var __emscripten_throw_longjmp = () => { - throw Infinity; - }; - - var emval_methodCallers = []; - var emval_addMethodCaller = (caller) => { - var id = emval_methodCallers.length; - emval_methodCallers.push(caller); - return id; - }; - - var emval_lookupTypes = (argCount, argTypes) => { - var a = new Array(argCount); - for (var i = 0; i < argCount; ++i) { - a[i] = requireRegisteredType(HEAPU32[(((argTypes)+(i*4))>>2)], - `parameter ${i}`); - } - return a; - }; - - - var emval_returnValue = (toReturnWire, destructorsRef, handle) => { - var destructors = []; - var result = toReturnWire(destructors, handle); - if (destructors.length) { - // void, primitives and any other types w/o destructors don't need to allocate a handle - HEAPU32[((destructorsRef)>>2)] = Emval.toHandle(destructors); - } - return result; - }; - - - var emval_symbols = { - }; - - var getStringOrSymbol = (address) => { - var symbol = emval_symbols[address]; - if (symbol === undefined) { - return AsciiToString(address); - } - return symbol; - }; - var __emval_create_invoker = (argCount, argTypesPtr, kind) => { - var GenericWireTypeSize = 8; - - var [retType, ...argTypes] = emval_lookupTypes(argCount, argTypesPtr); - var toReturnWire = retType.toWireType.bind(retType); - var argFromPtr = argTypes.map(type => type.readValueFromPointer.bind(type)); - argCount--; // remove the extracted return type - - var captures = {'toValue': Emval.toValue}; - var args = argFromPtr.map((argFromPtr, i) => { - var captureName = `argFromPtr${i}`; - captures[captureName] = argFromPtr; - return `${captureName}(args${i ? '+' + i * GenericWireTypeSize : ''})`; - }); - var functionBody; - switch (kind){ - case 0: - functionBody = 'toValue(handle)'; - break; - case 2: - functionBody = 'new (toValue(handle))'; - break; - case 3: - functionBody = ''; - break; - case 1: - captures['getStringOrSymbol'] = getStringOrSymbol; - functionBody = 'toValue(handle)[getStringOrSymbol(methodName)]'; - break; - } - functionBody += `(${args})`; - if (!retType.isVoid) { - captures['toReturnWire'] = toReturnWire; - captures['emval_returnValue'] = emval_returnValue; - functionBody = `return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`; - } - functionBody = `return function (handle, methodName, destructorsRef, args) { - ${functionBody} - }`; - - var invokerFunction = new Function(Object.keys(captures), functionBody)(...Object.values(captures)); - var functionName = `methodCaller<(${argTypes.map(t => t.name)}) => ${retType.name}>`; - return emval_addMethodCaller(createNamedFunction(functionName, invokerFunction)); - }; - - - - var __emval_get_global = (name) => { - if (!name) { - return Emval.toHandle(globalThis); - } - name = getStringOrSymbol(name); - return Emval.toHandle(globalThis[name]); - }; - - - var __emval_get_module_property = (name) => { - name = getStringOrSymbol(name); - return Emval.toHandle(Module[name]); - }; - - var __emval_get_property = (handle, key) => { - handle = Emval.toValue(handle); - key = Emval.toValue(key); - return Emval.toHandle(handle[key]); - }; - - var __emval_incref = (handle) => { - if (handle > 9) { - emval_handles[handle + 1] += 1; - } - }; - - - - var __emval_invoke = (caller, handle, methodName, destructorsRef, args) => { - return emval_methodCallers[caller](handle, methodName, destructorsRef, args); - }; - - - var __emval_new_cstring = (v) => Emval.toHandle(getStringOrSymbol(v)); - - - - var __emval_run_destructors = (handle) => { - var destructors = Emval.toValue(handle); - runDestructors(destructors); - __emval_decref(handle); - }; - - - - - - - var INT53_MAX = 9007199254740992; - - var INT53_MIN = -9007199254740992; - var bigintToI53Checked = (num) => (num < INT53_MIN || num > INT53_MAX) ? NaN : Number(num); - function __mmap_js(len, prot, flags, fd, offset, allocated, addr) { - offset = bigintToI53Checked(offset); - - - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - var res = FS.mmap(stream, len, offset, prot, flags); - var ptr = res.ptr; - HEAP32[((allocated)>>2)] = res.allocated; - HEAPU32[((addr)>>2)] = ptr; - return 0; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - ; - } - - - function __munmap_js(addr, len, prot, flags, fd, offset) { - offset = bigintToI53Checked(offset); - - - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - if (prot & 2) { - SYSCALLS.doMsync(addr, stream, len, flags, offset); - } - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return -e.errno; - } - ; - } - - var __tzset_js = (timezone, daylight, std_name, dst_name) => { - // TODO: Use (malleable) environment variables instead of system settings. - var currentYear = new Date().getFullYear(); - var winter = new Date(currentYear, 0, 1); - var summer = new Date(currentYear, 6, 1); - var winterOffset = winter.getTimezoneOffset(); - var summerOffset = summer.getTimezoneOffset(); - - // Local standard timezone offset. Local standard time is not adjusted for - // daylight savings. This code uses the fact that getTimezoneOffset returns - // a greater value during Standard Time versus Daylight Saving Time (DST). - // Thus it determines the expected output during Standard Time, and it - // compares whether the output of the given date the same (Standard) or less - // (DST). - var stdTimezoneOffset = Math.max(winterOffset, summerOffset); - - // timezone is specified as seconds west of UTC ("The external variable - // `timezone` shall be set to the difference, in seconds, between - // Coordinated Universal Time (UTC) and local standard time."), the same - // as returned by stdTimezoneOffset. - // See http://pubs.opengroup.org/onlinepubs/009695399/functions/tzset.html - HEAPU32[((timezone)>>2)] = stdTimezoneOffset * 60; - - HEAP32[((daylight)>>2)] = Number(winterOffset != summerOffset); - - var extractZone = (timezoneOffset) => { - // Why inverse sign? - // Read here https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/getTimezoneOffset - var sign = timezoneOffset >= 0 ? "-" : "+"; - - var absOffset = Math.abs(timezoneOffset) - var hours = String(Math.floor(absOffset / 60)).padStart(2, "0"); - var minutes = String(absOffset % 60).padStart(2, "0"); - - return `UTC${sign}${hours}${minutes}`; - } - - var winterName = extractZone(winterOffset); - var summerName = extractZone(summerOffset); - if (summerOffset < winterOffset) { - // Northern hemisphere - stringToUTF8(winterName, std_name, 17); - stringToUTF8(summerName, dst_name, 17); - } else { - stringToUTF8(winterName, dst_name, 17); - stringToUTF8(summerName, std_name, 17); - } - }; - - var _emscripten_get_now = () => performance.now(); - - var _emscripten_date_now = () => Date.now(); - - var nowIsMonotonic = 1; - - var checkWasiClock = (clock_id) => clock_id >= 0 && clock_id <= 3; - - function _clock_time_get(clk_id, ignored_precision, ptime) { - ignored_precision = bigintToI53Checked(ignored_precision); - - - if (!checkWasiClock(clk_id)) { - return 28; - } - var now; - // all wasi clocks but realtime are monotonic - if (clk_id === 0) { - now = _emscripten_date_now(); - } else if (nowIsMonotonic) { - now = _emscripten_get_now(); - } else { - return 52; - } - // "now" is in ms, and wasi times are in ns. - var nsec = Math.round(now * 1000 * 1000); - HEAP64[((ptime)>>3)] = BigInt(nsec); - return 0; - ; - } - - - var getHeapMax = () => - // Stay one Wasm page short of 4GB: while e.g. Chrome is able to allocate - // full 4GB Wasm memories, the size will wrap back to 0 bytes in Wasm side - // for any code that deals with heap sizes, which would require special - // casing all heap size related code to treat 0 specially. - 2147483648; - var _emscripten_get_heap_max = () => getHeapMax(); - - - - - var growMemory = (size) => { - var oldHeapSize = wasmMemory.buffer.byteLength; - var pages = ((size - oldHeapSize + 65535) / 65536) | 0; - try { - // round size grow request up to wasm page size (fixed 64KB per spec) - wasmMemory.grow(pages); // .grow() takes a delta compared to the previous size - updateMemoryViews(); - return 1 /*success*/; - } catch(e) { - } - // implicit 0 return to save code size (caller will cast "undefined" into 0 - // anyhow) - }; - var _emscripten_resize_heap = (requestedSize) => { - var oldSize = HEAPU8.length; - // With CAN_ADDRESS_2GB or MEMORY64, pointers are already unsigned. - requestedSize >>>= 0; - // With multithreaded builds, races can happen (another thread might increase the size - // in between), so return a failure, and let the caller retry. - - // Memory resize rules: - // 1. Always increase heap size to at least the requested size, rounded up - // to next page multiple. - // 2a. If MEMORY_GROWTH_LINEAR_STEP == -1, excessively resize the heap - // geometrically: increase the heap size according to - // MEMORY_GROWTH_GEOMETRIC_STEP factor (default +20%), At most - // overreserve by MEMORY_GROWTH_GEOMETRIC_CAP bytes (default 96MB). - // 2b. If MEMORY_GROWTH_LINEAR_STEP != -1, excessively resize the heap - // linearly: increase the heap size by at least - // MEMORY_GROWTH_LINEAR_STEP bytes. - // 3. Max size for the heap is capped at 2048MB-WASM_PAGE_SIZE, or by - // MAXIMUM_MEMORY, or by ASAN limit, depending on which is smallest - // 4. If we were unable to allocate as much memory, it may be due to - // over-eager decision to excessively reserve due to (3) above. - // Hence if an allocation fails, cut down on the amount of excess - // growth, in an attempt to succeed to perform a smaller allocation. - - // A limit is set for how much we can grow. We should not exceed that - // (the wasm binary specifies it, so if we tried, we'd fail anyhow). - var maxHeapSize = getHeapMax(); - if (requestedSize > maxHeapSize) { - return false; - } - - // Loop through potential heap size increases. If we attempt a too eager - // reservation that fails, cut down on the attempted size and reserve a - // smaller bump instead. (max 3 times, chosen somewhat arbitrarily) - for (var cutDown = 1; cutDown <= 4; cutDown *= 2) { - var overGrownHeapSize = oldSize * (1 + 0.2 / cutDown); // ensure geometric growth - // but limit overreserving (default to capping at +96MB overgrowth at most) - overGrownHeapSize = Math.min(overGrownHeapSize, requestedSize + 100663296 ); - - var newSize = Math.min(maxHeapSize, alignMemory(Math.max(requestedSize, overGrownHeapSize), 65536)); - - var replacement = growMemory(newSize); - if (replacement) { - - return true; - } - } - return false; - }; - - var ENV = { - }; - - var getExecutableName = () => thisProgram || './this.program'; - var getEnvStrings = () => { - if (!getEnvStrings.strings) { - // Default values. - // Browser language detection #8751 - var lang = ((typeof navigator == 'object' && navigator.language) || 'C').replace('-', '_') + '.UTF-8'; - var env = { - 'USER': 'web_user', - 'LOGNAME': 'web_user', - 'PATH': '/', - 'PWD': '/', - 'HOME': '/home/web_user', - 'LANG': lang, - '_': getExecutableName() - }; - // Apply the user-provided values, if any. - for (var x in ENV) { - // x is a key in ENV; if ENV[x] is undefined, that means it was - // explicitly set to be so. We allow user code to do that to - // force variables with default values to remain unset. - if (ENV[x] === undefined) delete env[x]; - else env[x] = ENV[x]; - } - var strings = []; - for (var x in env) { - strings.push(`${x}=${env[x]}`); - } - getEnvStrings.strings = strings; - } - return getEnvStrings.strings; - }; - - var _environ_get = (__environ, environ_buf) => { - var bufSize = 0; - var envp = 0; - for (var string of getEnvStrings()) { - var ptr = environ_buf + bufSize; - HEAPU32[(((__environ)+(envp))>>2)] = ptr; - bufSize += stringToUTF8(string, ptr, Infinity) + 1; - envp += 4; - } - return 0; - }; - - - var _environ_sizes_get = (penviron_count, penviron_buf_size) => { - var strings = getEnvStrings(); - HEAPU32[((penviron_count)>>2)] = strings.length; - var bufSize = 0; - for (var string of strings) { - bufSize += lengthBytesUTF8(string) + 1; - } - HEAPU32[((penviron_buf_size)>>2)] = bufSize; - return 0; - }; - - function _fd_close(fd) { - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - FS.close(stream); - return 0; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return e.errno; - } - } - - /** @param {number=} offset */ - var doReadv = (stream, iov, iovcnt, offset) => { - var ret = 0; - for (var i = 0; i < iovcnt; i++) { - var ptr = HEAPU32[((iov)>>2)]; - var len = HEAPU32[(((iov)+(4))>>2)]; - iov += 8; - var curr = FS.read(stream, HEAP8, ptr, len, offset); - if (curr < 0) return -1; - ret += curr; - if (curr < len) break; // nothing more to read - if (typeof offset != 'undefined') { - offset += curr; - } - } - return ret; - }; - - function _fd_read(fd, iov, iovcnt, pnum) { - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - var num = doReadv(stream, iov, iovcnt); - HEAPU32[((pnum)>>2)] = num; - return 0; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return e.errno; - } - } - - - function _fd_seek(fd, offset, whence, newOffset) { - offset = bigintToI53Checked(offset); - - - try { - - if (isNaN(offset)) return 61; - var stream = SYSCALLS.getStreamFromFD(fd); - FS.llseek(stream, offset, whence); - HEAP64[((newOffset)>>3)] = BigInt(stream.position); - if (stream.getdents && offset === 0 && whence === 0) stream.getdents = null; // reset readdir state - return 0; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return e.errno; - } - ; - } - - /** @param {number=} offset */ - var doWritev = (stream, iov, iovcnt, offset) => { - var ret = 0; - for (var i = 0; i < iovcnt; i++) { - var ptr = HEAPU32[((iov)>>2)]; - var len = HEAPU32[(((iov)+(4))>>2)]; - iov += 8; - var curr = FS.write(stream, HEAP8, ptr, len, offset); - if (curr < 0) return -1; - ret += curr; - if (curr < len) { - // No more space to write. - break; - } - if (typeof offset != 'undefined') { - offset += curr; - } - } - return ret; - }; - - function _fd_write(fd, iov, iovcnt, pnum) { - try { - - var stream = SYSCALLS.getStreamFromFD(fd); - var num = doWritev(stream, iov, iovcnt); - HEAPU32[((pnum)>>2)] = num; - return 0; - } catch (e) { - if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e; - return e.errno; - } - } - - - FS.createPreloadedFile = FS_createPreloadedFile; - FS.preloadFile = FS_preloadFile; - FS.staticInit();; -init_ClassHandle(); -init_RegisteredPointer(); -// End JS library code - -// include: postlibrary.js -// This file is included after the automatically-generated JS library code -// but before the wasm module is created. - -{ - - // Begin ATMODULES hooks - if (Module['noExitRuntime']) noExitRuntime = Module['noExitRuntime']; -if (Module['preloadPlugins']) preloadPlugins = Module['preloadPlugins']; -if (Module['print']) out = Module['print']; -if (Module['printErr']) err = Module['printErr']; -if (Module['wasmBinary']) wasmBinary = Module['wasmBinary']; - // End ATMODULES hooks - - if (Module['arguments']) arguments_ = Module['arguments']; - if (Module['thisProgram']) thisProgram = Module['thisProgram']; - - if (Module['preInit']) { - if (typeof Module['preInit'] == 'function') Module['preInit'] = [Module['preInit']]; - while (Module['preInit'].length > 0) { - Module['preInit'].shift()(); - } - } -} - -// Begin runtime exports - // End runtime exports - // Begin JS library exports - // End JS library exports - -// end include: postlibrary.js - - -// Imports from the Wasm binary. -var ___getTypeName, - _free, - _malloc, - _emscripten_builtin_memalign, - _setThrew, - __emscripten_stack_restore, - __emscripten_stack_alloc, - _emscripten_stack_get_current, - memory, - __indirect_function_table, - wasmMemory, - wasmTable; - - -function assignWasmExports(wasmExports) { - ___getTypeName = wasmExports['__getTypeName']; - _free = wasmExports['free']; - _malloc = wasmExports['malloc']; - _emscripten_builtin_memalign = wasmExports['emscripten_builtin_memalign']; - _setThrew = wasmExports['setThrew']; - __emscripten_stack_restore = wasmExports['_emscripten_stack_restore']; - __emscripten_stack_alloc = wasmExports['_emscripten_stack_alloc']; - _emscripten_stack_get_current = wasmExports['emscripten_stack_get_current']; - memory = wasmMemory = wasmExports['memory']; - __indirect_function_table = wasmTable = wasmExports['__indirect_function_table']; -} - -var wasmImports = { - /** @export */ - __cxa_throw: ___cxa_throw, - /** @export */ - __syscall_fcntl64: ___syscall_fcntl64, - /** @export */ - __syscall_fstat64: ___syscall_fstat64, - /** @export */ - __syscall_ioctl: ___syscall_ioctl, - /** @export */ - __syscall_lstat64: ___syscall_lstat64, - /** @export */ - __syscall_newfstatat: ___syscall_newfstatat, - /** @export */ - __syscall_openat: ___syscall_openat, - /** @export */ - __syscall_stat64: ___syscall_stat64, - /** @export */ - _abort_js: __abort_js, - /** @export */ - _embind_finalize_value_object: __embind_finalize_value_object, - /** @export */ - _embind_register_bigint: __embind_register_bigint, - /** @export */ - _embind_register_bool: __embind_register_bool, - /** @export */ - _embind_register_class: __embind_register_class, - /** @export */ - _embind_register_class_constructor: __embind_register_class_constructor, - /** @export */ - _embind_register_class_function: __embind_register_class_function, - /** @export */ - _embind_register_constant: __embind_register_constant, - /** @export */ - _embind_register_emval: __embind_register_emval, - /** @export */ - _embind_register_enum: __embind_register_enum, - /** @export */ - _embind_register_enum_value: __embind_register_enum_value, - /** @export */ - _embind_register_float: __embind_register_float, - /** @export */ - _embind_register_function: __embind_register_function, - /** @export */ - _embind_register_integer: __embind_register_integer, - /** @export */ - _embind_register_memory_view: __embind_register_memory_view, - /** @export */ - _embind_register_std_string: __embind_register_std_string, - /** @export */ - _embind_register_std_wstring: __embind_register_std_wstring, - /** @export */ - _embind_register_value_object: __embind_register_value_object, - /** @export */ - _embind_register_value_object_field: __embind_register_value_object_field, - /** @export */ - _embind_register_void: __embind_register_void, - /** @export */ - _emscripten_throw_longjmp: __emscripten_throw_longjmp, - /** @export */ - _emval_create_invoker: __emval_create_invoker, - /** @export */ - _emval_decref: __emval_decref, - /** @export */ - _emval_get_global: __emval_get_global, - /** @export */ - _emval_get_module_property: __emval_get_module_property, - /** @export */ - _emval_get_property: __emval_get_property, - /** @export */ - _emval_incref: __emval_incref, - /** @export */ - _emval_invoke: __emval_invoke, - /** @export */ - _emval_new_cstring: __emval_new_cstring, - /** @export */ - _emval_run_destructors: __emval_run_destructors, - /** @export */ - _mmap_js: __mmap_js, - /** @export */ - _munmap_js: __munmap_js, - /** @export */ - _tzset_js: __tzset_js, - /** @export */ - clock_time_get: _clock_time_get, - /** @export */ - emscripten_date_now: _emscripten_date_now, - /** @export */ - emscripten_get_heap_max: _emscripten_get_heap_max, - /** @export */ - emscripten_get_now: _emscripten_get_now, - /** @export */ - emscripten_resize_heap: _emscripten_resize_heap, - /** @export */ - environ_get: _environ_get, - /** @export */ - environ_sizes_get: _environ_sizes_get, - /** @export */ - fd_close: _fd_close, - /** @export */ - fd_read: _fd_read, - /** @export */ - fd_seek: _fd_seek, - /** @export */ - fd_write: _fd_write, - /** @export */ - invoke_ii, - /** @export */ - invoke_vi, - /** @export */ - invoke_vii, - /** @export */ - invoke_viii -}; - -function invoke_vi(index,a1) { - var sp = stackSave(); - try { - getWasmTableEntry(index)(a1); - } catch(e) { - stackRestore(sp); - if (e !== e+0) throw e; - _setThrew(1, 0); - } -} - -function invoke_viii(index,a1,a2,a3) { - var sp = stackSave(); - try { - getWasmTableEntry(index)(a1,a2,a3); - } catch(e) { - stackRestore(sp); - if (e !== e+0) throw e; - _setThrew(1, 0); - } -} - -function invoke_ii(index,a1) { - var sp = stackSave(); - try { - return getWasmTableEntry(index)(a1); - } catch(e) { - stackRestore(sp); - if (e !== e+0) throw e; - _setThrew(1, 0); - } -} - -function invoke_vii(index,a1,a2) { - var sp = stackSave(); - try { - getWasmTableEntry(index)(a1,a2); - } catch(e) { - stackRestore(sp); - if (e !== e+0) throw e; - _setThrew(1, 0); - } -} - - -// include: postamble.js -// === Auto-generated postamble setup entry stuff === - -function run() { - - if (runDependencies > 0) { - dependenciesFulfilled = run; - return; - } - - preRun(); - - // a preRun added a dependency, run will be called later - if (runDependencies > 0) { - dependenciesFulfilled = run; - return; - } - - function doRun() { - // run may have just been called through dependencies being fulfilled just in this very frame, - // or while the async setStatus time below was happening - Module['calledRun'] = true; - - if (ABORT) return; - - initRuntime(); - - readyPromiseResolve?.(Module); - Module['onRuntimeInitialized']?.(); - - postRun(); - } - - if (Module['setStatus']) { - Module['setStatus']('Running...'); - setTimeout(() => { - setTimeout(() => Module['setStatus'](''), 1); - doRun(); - }, 1); - } else - { - doRun(); - } -} - -var wasmExports; - -// In modularize mode the generated code is within a factory function so we -// can use await here (since it's not top-level-await). -wasmExports = await (createWasm()); - -run(); - -// end include: postamble.js - -// include: postamble_modularize.js -// In MODULARIZE mode we wrap the generated code in a factory function -// and return either the Module itself, or a promise of the module. -// -// We assign to the `moduleRtn` global here and configure closure to see -// this as and extern so it won't get minified. - -if (runtimeInitialized) { - moduleRtn = Module; -} else { - // Set up the promise that indicates the Module is initialized - moduleRtn = new Promise((resolve, reject) => { - readyPromiseResolve = resolve; - readyPromiseReject = reject; - }); -} - -// end include: postamble_modularize.js - - - - return moduleRtn; - }; -})(); - -// Export using a UMD style export, or ES6 exports if selected -if (typeof exports === 'object' && typeof module === 'object') { - module.exports = BASIS; - // This default export looks redundant, but it allows TS to import this - // commonjs style module. - module.exports.default = BASIS; -} else if (typeof define === 'function' && define['amd']) - define([], () => BASIS); +var BASIS=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};if(typeof __filename!="undefined"){_scriptName=__filename}else if(ENVIRONMENT_IS_WORKER){_scriptName=self.location.href}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("fs");scriptDirectory=__dirname+"/";readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{if(isFileURI(url)){return new Promise((resolve,reject)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response);return}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");var readyPromiseResolve,readyPromiseReject;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;if(!Module["noFSInit"]&&!FS.initialized)FS.init();TTY.init();wasmExports["$"]();FS.ignorePermissions=false}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("basis_encoder.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);updateMemoryViews();return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;var stackRestore=val=>__emscripten_stack_restore(val);var stackSave=()=>_emscripten_stack_get_current();class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){HEAPU32[this.ptr+4>>2]=type}get_type(){return HEAPU32[this.ptr+4>>2]}set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}get_destructor(){return HEAPU32[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}get_caught(){return HEAP8[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}get_rethrown(){return HEAP8[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}}var exceptionLast=0;var uncaughtExceptionCount=0;var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};var syscallGetVarargI=()=>{var ret=HEAP32[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret};var syscallGetVarargP=syscallGetVarargI;var PATH={isAbs:path=>path.charAt(0)==="/",splitPath:filename=>{var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;return splitPathRe.exec(filename).slice(1)},normalizeArray:(parts,allowAboveRoot)=>{var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up;up--){parts.unshift("..")}}return parts},normalize:path=>{var isAbsolute=PATH.isAbs(path),trailingSlash=path.slice(-1)==="/";path=PATH.normalizeArray(path.split("/").filter(p=>!!p),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path},dirname:path=>{var result=PATH.splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.slice(0,-1)}return root+dir},basename:path=>path&&path.match(/([^\/]+|\/)\/*$/)[1],join:(...paths)=>PATH.normalize(paths.join("/")),join2:(l,r)=>PATH.normalize(l+"/"+r)};var initRandomFill=()=>{if(ENVIRONMENT_IS_NODE){var nodeCrypto=require("crypto");return view=>nodeCrypto.randomFillSync(view)}return view=>crypto.getRandomValues(view)};var randomFill=view=>{(randomFill=initRandomFill())(view)};var PATH_FS={resolve:(...args)=>{var resolvedPath="",resolvedAbsolute=false;for(var i=args.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?args[i]:FS.cwd();if(typeof path!="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){return""}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=PATH.isAbs(path)}resolvedPath=PATH.normalizeArray(resolvedPath.split("/").filter(p=>!!p),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."},relative:(from,to)=>{from=PATH_FS.resolve(from).slice(1);to=PATH_FS.resolve(to).slice(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var FS_stdin_getChar_buffer=[];var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var intArrayFromString=(stringy,dontAddNull,length)=>{var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array};var FS_stdin_getChar=()=>{if(!FS_stdin_getChar_buffer.length){var result=null;if(ENVIRONMENT_IS_NODE){var BUFSIZE=256;var buf=Buffer.alloc(BUFSIZE);var bytesRead=0;var fd=process.stdin.fd;try{bytesRead=fs.readSync(fd,buf,0,BUFSIZE)}catch(e){if(e.toString().includes("EOF"))bytesRead=0;else throw e}if(bytesRead>0){result=buf.slice(0,bytesRead).toString("utf-8")}}else if(globalThis.window?.prompt){result=window.prompt("Input: ");if(result!==null){result+="\n"}}else{}if(!result){return null}FS_stdin_getChar_buffer=intArrayFromString(result,true)}return FS_stdin_getChar_buffer.shift()};var TTY={ttys:[],init(){},shutdown(){},register(dev,ops){TTY.ttys[dev]={input:[],output:[],ops};FS.registerDevice(dev,TTY.stream_ops)},stream_ops:{open(stream){var tty=TTY.ttys[stream.node.rdev];if(!tty){throw new FS.ErrnoError(43)}stream.tty=tty;stream.seekable=false},close(stream){stream.tty.ops.fsync(stream.tty)},fsync(stream){stream.tty.ops.fsync(stream.tty)},read(stream,buffer,offset,length,pos){if(!stream.tty||!stream.tty.ops.get_char){throw new FS.ErrnoError(60)}var bytesRead=0;for(var i=0;i0){out(UTF8ArrayToString(tty.output));tty.output=[]}},ioctl_tcgets(tty){return{c_iflag:25856,c_oflag:5,c_cflag:191,c_lflag:35387,c_cc:[3,28,127,21,4,0,1,0,17,19,26,0,18,15,23,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}},ioctl_tcsets(tty,optional_actions,data){return 0},ioctl_tiocgwinsz(tty){return[24,80]}},default_tty1_ops:{put_char(tty,val){if(val===null||val===10){err(UTF8ArrayToString(tty.output));tty.output=[]}else{if(val!=0)tty.output.push(val)}},fsync(tty){if(tty.output?.length>0){err(UTF8ArrayToString(tty.output));tty.output=[]}}}};var zeroMemory=(ptr,size)=>HEAPU8.fill(0,ptr,ptr+size);var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var mmapAlloc=size=>{size=alignMemory(size,65536);var ptr=_emscripten_builtin_memalign(65536,size);if(ptr)zeroMemory(ptr,size);return ptr};var MEMFS={ops_table:null,mount(mount){return MEMFS.createNode(null,"/",16895,0)},createNode(parent,name,mode,dev){if(FS.isBlkdev(mode)||FS.isFIFO(mode)){throw new FS.ErrnoError(63)}MEMFS.ops_table||={dir:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,lookup:MEMFS.node_ops.lookup,mknod:MEMFS.node_ops.mknod,rename:MEMFS.node_ops.rename,unlink:MEMFS.node_ops.unlink,rmdir:MEMFS.node_ops.rmdir,readdir:MEMFS.node_ops.readdir,symlink:MEMFS.node_ops.symlink},stream:{llseek:MEMFS.stream_ops.llseek}},file:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:{llseek:MEMFS.stream_ops.llseek,read:MEMFS.stream_ops.read,write:MEMFS.stream_ops.write,mmap:MEMFS.stream_ops.mmap,msync:MEMFS.stream_ops.msync}},link:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,readlink:MEMFS.node_ops.readlink},stream:{}},chrdev:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:FS.chrdev_stream_ops}};var node=FS.createNode(parent,name,mode,dev);if(FS.isDir(node.mode)){node.node_ops=MEMFS.ops_table.dir.node;node.stream_ops=MEMFS.ops_table.dir.stream;node.contents={}}else if(FS.isFile(node.mode)){node.node_ops=MEMFS.ops_table.file.node;node.stream_ops=MEMFS.ops_table.file.stream;node.usedBytes=0;node.contents=null}else if(FS.isLink(node.mode)){node.node_ops=MEMFS.ops_table.link.node;node.stream_ops=MEMFS.ops_table.link.stream}else if(FS.isChrdev(node.mode)){node.node_ops=MEMFS.ops_table.chrdev.node;node.stream_ops=MEMFS.ops_table.chrdev.stream}node.atime=node.mtime=node.ctime=Date.now();if(parent){parent.contents[name]=node;parent.atime=parent.mtime=parent.ctime=node.atime}return node},getFileDataAsTypedArray(node){if(!node.contents)return new Uint8Array(0);if(node.contents.subarray)return node.contents.subarray(0,node.usedBytes);return new Uint8Array(node.contents)},expandFileStorage(node,newCapacity){var prevCapacity=node.contents?node.contents.length:0;if(prevCapacity>=newCapacity)return;var CAPACITY_DOUBLING_MAX=1024*1024;newCapacity=Math.max(newCapacity,prevCapacity*(prevCapacity>>0);if(prevCapacity!=0)newCapacity=Math.max(newCapacity,256);var oldContents=node.contents;node.contents=new Uint8Array(newCapacity);if(node.usedBytes>0)node.contents.set(oldContents.subarray(0,node.usedBytes),0)},resizeFileStorage(node,newSize){if(node.usedBytes==newSize)return;if(newSize==0){node.contents=null;node.usedBytes=0}else{var oldContents=node.contents;node.contents=new Uint8Array(newSize);if(oldContents){node.contents.set(oldContents.subarray(0,Math.min(newSize,node.usedBytes)))}node.usedBytes=newSize}},node_ops:{getattr(node){var attr={};attr.dev=FS.isChrdev(node.mode)?node.id:1;attr.ino=node.id;attr.mode=node.mode;attr.nlink=1;attr.uid=0;attr.gid=0;attr.rdev=node.rdev;if(FS.isDir(node.mode)){attr.size=4096}else if(FS.isFile(node.mode)){attr.size=node.usedBytes}else if(FS.isLink(node.mode)){attr.size=node.link.length}else{attr.size=0}attr.atime=new Date(node.atime);attr.mtime=new Date(node.mtime);attr.ctime=new Date(node.ctime);attr.blksize=4096;attr.blocks=Math.ceil(attr.size/attr.blksize);return attr},setattr(node,attr){for(const key of["mode","atime","mtime","ctime"]){if(attr[key]!=null){node[key]=attr[key]}}if(attr.size!==undefined){MEMFS.resizeFileStorage(node,attr.size)}},lookup(parent,name){if(!MEMFS.doesNotExistError){MEMFS.doesNotExistError=new FS.ErrnoError(44);MEMFS.doesNotExistError.stack=""}throw MEMFS.doesNotExistError},mknod(parent,name,mode,dev){return MEMFS.createNode(parent,name,mode,dev)},rename(old_node,new_dir,new_name){var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(new_node){if(FS.isDir(old_node.mode)){for(var i in new_node.contents){throw new FS.ErrnoError(55)}}FS.hashRemoveNode(new_node)}delete old_node.parent.contents[old_node.name];new_dir.contents[new_name]=old_node;old_node.name=new_name;new_dir.ctime=new_dir.mtime=old_node.parent.ctime=old_node.parent.mtime=Date.now()},unlink(parent,name){delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},rmdir(parent,name){var node=FS.lookupNode(parent,name);for(var i in node.contents){throw new FS.ErrnoError(55)}delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},readdir(node){return[".","..",...Object.keys(node.contents)]},symlink(parent,newname,oldpath){var node=MEMFS.createNode(parent,newname,511|40960,0);node.link=oldpath;return node},readlink(node){if(!FS.isLink(node.mode)){throw new FS.ErrnoError(28)}return node.link}},stream_ops:{read(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=stream.node.usedBytes)return 0;var size=Math.min(stream.node.usedBytes-position,length);if(size>8&&contents.subarray){buffer.set(contents.subarray(position,position+size),offset)}else{for(var i=0;i0||position+length{var flagModes={r:0,"r+":2,w:512|64|1,"w+":512|64|2,a:1024|64|1,"a+":1024|64|2};var flags=flagModes[str];if(typeof flags=="undefined"){throw new Error(`Unknown file open mode: ${str}`)}return flags};var FS_getMode=(canRead,canWrite)=>{var mode=0;if(canRead)mode|=292|73;if(canWrite)mode|=146;return mode};var asyncLoad=async url=>{var arrayBuffer=await readAsync(url);return new Uint8Array(arrayBuffer)};var FS_createDataFile=(...args)=>FS.createDataFile(...args);var getUniqueRunDependency=id=>id;var runDependencies=0;var dependenciesFulfilled=null;var removeRunDependency=id=>{runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}};var addRunDependency=id=>{runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)};var preloadPlugins=[];var FS_handledByPreloadPlugin=async(byteArray,fullname)=>{if(typeof Browser!="undefined")Browser.init();for(var plugin of preloadPlugins){if(plugin["canHandle"](fullname)){return plugin["handle"](byteArray,fullname)}}return byteArray};var FS_preloadFile=async(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish)=>{var fullname=name?PATH_FS.resolve(PATH.join2(parent,name)):parent;var dep=getUniqueRunDependency(`cp ${fullname}`);addRunDependency(dep);try{var byteArray=url;if(typeof url=="string"){byteArray=await asyncLoad(url)}byteArray=await FS_handledByPreloadPlugin(byteArray,fullname);preFinish?.();if(!dontCreateFile){FS_createDataFile(parent,name,byteArray,canRead,canWrite,canOwn)}}finally{removeRunDependency(dep)}};var FS_createPreloadedFile=(parent,name,url,canRead,canWrite,onload,onerror,dontCreateFile,canOwn,preFinish)=>{FS_preloadFile(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish).then(onload).catch(onerror)};var FS={root:null,mounts:[],devices:{},streams:[],nextInode:1,nameTable:null,currentPath:"/",initialized:false,ignorePermissions:true,filesystems:null,syncFSRequests:0,readFiles:{},ErrnoError:class{name="ErrnoError";constructor(errno){this.errno=errno}},FSStream:class{shared={};get object(){return this.node}set object(val){this.node=val}get isRead(){return(this.flags&2097155)!==1}get isWrite(){return(this.flags&2097155)!==0}get isAppend(){return this.flags&1024}get flags(){return this.shared.flags}set flags(val){this.shared.flags=val}get position(){return this.shared.position}set position(val){this.shared.position=val}},FSNode:class{node_ops={};stream_ops={};readMode=292|73;writeMode=146;mounted=null;constructor(parent,name,mode,rdev){if(!parent){parent=this}this.parent=parent;this.mount=parent.mount;this.id=FS.nextInode++;this.name=name;this.mode=mode;this.rdev=rdev;this.atime=this.mtime=this.ctime=Date.now()}get read(){return(this.mode&this.readMode)===this.readMode}set read(val){val?this.mode|=this.readMode:this.mode&=~this.readMode}get write(){return(this.mode&this.writeMode)===this.writeMode}set write(val){val?this.mode|=this.writeMode:this.mode&=~this.writeMode}get isFolder(){return FS.isDir(this.mode)}get isDevice(){return FS.isChrdev(this.mode)}},lookupPath(path,opts={}){if(!path){throw new FS.ErrnoError(44)}opts.follow_mount??=true;if(!PATH.isAbs(path)){path=FS.cwd()+"/"+path}linkloop:for(var nlinks=0;nlinks<40;nlinks++){var parts=path.split("/").filter(p=>!!p);var current=FS.root;var current_path="/";for(var i=0;i>>0)%FS.nameTable.length},hashAddNode(node){var hash=FS.hashName(node.parent.id,node.name);node.name_next=FS.nameTable[hash];FS.nameTable[hash]=node},hashRemoveNode(node){var hash=FS.hashName(node.parent.id,node.name);if(FS.nameTable[hash]===node){FS.nameTable[hash]=node.name_next}else{var current=FS.nameTable[hash];while(current){if(current.name_next===node){current.name_next=node.name_next;break}current=current.name_next}}},lookupNode(parent,name){var errCode=FS.mayLookup(parent);if(errCode){throw new FS.ErrnoError(errCode)}var hash=FS.hashName(parent.id,name);for(var node=FS.nameTable[hash];node;node=node.name_next){var nodeName=node.name;if(node.parent.id===parent.id&&nodeName===name){return node}}return FS.lookup(parent,name)},createNode(parent,name,mode,rdev){var node=new FS.FSNode(parent,name,mode,rdev);FS.hashAddNode(node);return node},destroyNode(node){FS.hashRemoveNode(node)},isRoot(node){return node===node.parent},isMountpoint(node){return!!node.mounted},isFile(mode){return(mode&61440)===32768},isDir(mode){return(mode&61440)===16384},isLink(mode){return(mode&61440)===40960},isChrdev(mode){return(mode&61440)===8192},isBlkdev(mode){return(mode&61440)===24576},isFIFO(mode){return(mode&61440)===4096},isSocket(mode){return(mode&49152)===49152},flagsToPermissionString(flag){var perms=["r","w","rw"][flag&3];if(flag&512){perms+="w"}return perms},nodePermissions(node,perms){if(FS.ignorePermissions){return 0}if(perms.includes("r")&&!(node.mode&292)){return 2}else if(perms.includes("w")&&!(node.mode&146)){return 2}else if(perms.includes("x")&&!(node.mode&73)){return 2}return 0},mayLookup(dir){if(!FS.isDir(dir.mode))return 54;var errCode=FS.nodePermissions(dir,"x");if(errCode)return errCode;if(!dir.node_ops.lookup)return 2;return 0},mayCreate(dir,name){if(!FS.isDir(dir.mode)){return 54}try{var node=FS.lookupNode(dir,name);return 20}catch(e){}return FS.nodePermissions(dir,"wx")},mayDelete(dir,name,isdir){var node;try{node=FS.lookupNode(dir,name)}catch(e){return e.errno}var errCode=FS.nodePermissions(dir,"wx");if(errCode){return errCode}if(isdir){if(!FS.isDir(node.mode)){return 54}if(FS.isRoot(node)||FS.getPath(node)===FS.cwd()){return 10}}else{if(FS.isDir(node.mode)){return 31}}return 0},mayOpen(node,flags){if(!node){return 44}if(FS.isLink(node.mode)){return 32}else if(FS.isDir(node.mode)){if(FS.flagsToPermissionString(flags)!=="r"||flags&(512|64)){return 31}}return FS.nodePermissions(node,FS.flagsToPermissionString(flags))},checkOpExists(op,err){if(!op){throw new FS.ErrnoError(err)}return op},MAX_OPEN_FDS:4096,nextfd(){for(var fd=0;fd<=FS.MAX_OPEN_FDS;fd++){if(!FS.streams[fd]){return fd}}throw new FS.ErrnoError(33)},getStreamChecked(fd){var stream=FS.getStream(fd);if(!stream){throw new FS.ErrnoError(8)}return stream},getStream:fd=>FS.streams[fd],createStream(stream,fd=-1){stream=Object.assign(new FS.FSStream,stream);if(fd==-1){fd=FS.nextfd()}stream.fd=fd;FS.streams[fd]=stream;return stream},closeStream(fd){FS.streams[fd]=null},dupStream(origStream,fd=-1){var stream=FS.createStream(origStream,fd);stream.stream_ops?.dup?.(stream);return stream},doSetAttr(stream,node,attr){var setattr=stream?.stream_ops.setattr;var arg=setattr?stream:node;setattr??=node.node_ops.setattr;FS.checkOpExists(setattr,63);setattr(arg,attr)},chrdev_stream_ops:{open(stream){var device=FS.getDevice(stream.node.rdev);stream.stream_ops=device.stream_ops;stream.stream_ops.open?.(stream)},llseek(){throw new FS.ErrnoError(70)}},major:dev=>dev>>8,minor:dev=>dev&255,makedev:(ma,mi)=>ma<<8|mi,registerDevice(dev,ops){FS.devices[dev]={stream_ops:ops}},getDevice:dev=>FS.devices[dev],getMounts(mount){var mounts=[];var check=[mount];while(check.length){var m=check.pop();mounts.push(m);check.push(...m.mounts)}return mounts},syncfs(populate,callback){if(typeof populate=="function"){callback=populate;populate=false}FS.syncFSRequests++;if(FS.syncFSRequests>1){err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`)}var mounts=FS.getMounts(FS.root.mount);var completed=0;function doCallback(errCode){FS.syncFSRequests--;return callback(errCode)}function done(errCode){if(errCode){if(!done.errored){done.errored=true;return doCallback(errCode)}return}if(++completed>=mounts.length){doCallback(null)}}for(var mount of mounts){if(mount.type.syncfs){mount.type.syncfs(mount,populate,done)}else{done(null)}}},mount(type,opts,mountpoint){var root=mountpoint==="/";var pseudo=!mountpoint;var node;if(root&&FS.root){throw new FS.ErrnoError(10)}else if(!root&&!pseudo){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});mountpoint=lookup.path;node=lookup.node;if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}if(!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}}var mount={type,opts,mountpoint,mounts:[]};var mountRoot=type.mount(mount);mountRoot.mount=mount;mount.root=mountRoot;if(root){FS.root=mountRoot}else if(node){node.mounted=mount;if(node.mount){node.mount.mounts.push(mount)}}return mountRoot},unmount(mountpoint){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});if(!FS.isMountpoint(lookup.node)){throw new FS.ErrnoError(28)}var node=lookup.node;var mount=node.mounted;var mounts=FS.getMounts(mount);for(var[hash,current]of Object.entries(FS.nameTable)){while(current){var next=current.name_next;if(mounts.includes(current.mount)){FS.destroyNode(current)}current=next}}node.mounted=null;var idx=node.mount.mounts.indexOf(mount);node.mount.mounts.splice(idx,1)},lookup(parent,name){return parent.node_ops.lookup(parent,name)},mknod(path,mode,dev){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);if(!name){throw new FS.ErrnoError(28)}if(name==="."||name===".."){throw new FS.ErrnoError(20)}var errCode=FS.mayCreate(parent,name);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.mknod){throw new FS.ErrnoError(63)}return parent.node_ops.mknod(parent,name,mode,dev)},statfs(path){return FS.statfsNode(FS.lookupPath(path,{follow:true}).node)},statfsStream(stream){return FS.statfsNode(stream.node)},statfsNode(node){var rtn={bsize:4096,frsize:4096,blocks:1e6,bfree:5e5,bavail:5e5,files:FS.nextInode,ffree:FS.nextInode-1,fsid:42,flags:2,namelen:255};if(node.node_ops.statfs){Object.assign(rtn,node.node_ops.statfs(node.mount.opts.root))}return rtn},create(path,mode=438){mode&=4095;mode|=32768;return FS.mknod(path,mode,0)},mkdir(path,mode=511){mode&=511|512;mode|=16384;return FS.mknod(path,mode,0)},mkdirTree(path,mode){var dirs=path.split("/");var d="";for(var dir of dirs){if(!dir)continue;if(d||PATH.isAbs(path))d+="/";d+=dir;try{FS.mkdir(d,mode)}catch(e){if(e.errno!=20)throw e}}},mkdev(path,mode,dev){if(typeof dev=="undefined"){dev=mode;mode=438}mode|=8192;return FS.mknod(path,mode,dev)},symlink(oldpath,newpath){if(!PATH_FS.resolve(oldpath)){throw new FS.ErrnoError(44)}var lookup=FS.lookupPath(newpath,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var newname=PATH.basename(newpath);var errCode=FS.mayCreate(parent,newname);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.symlink){throw new FS.ErrnoError(63)}return parent.node_ops.symlink(parent,newname,oldpath)},rename(old_path,new_path){var old_dirname=PATH.dirname(old_path);var new_dirname=PATH.dirname(new_path);var old_name=PATH.basename(old_path);var new_name=PATH.basename(new_path);var lookup,old_dir,new_dir;lookup=FS.lookupPath(old_path,{parent:true});old_dir=lookup.node;lookup=FS.lookupPath(new_path,{parent:true});new_dir=lookup.node;if(!old_dir||!new_dir)throw new FS.ErrnoError(44);if(old_dir.mount!==new_dir.mount){throw new FS.ErrnoError(75)}var old_node=FS.lookupNode(old_dir,old_name);var relative=PATH_FS.relative(old_path,new_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(28)}relative=PATH_FS.relative(new_path,old_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(55)}var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(old_node===new_node){return}var isdir=FS.isDir(old_node.mode);var errCode=FS.mayDelete(old_dir,old_name,isdir);if(errCode){throw new FS.ErrnoError(errCode)}errCode=new_node?FS.mayDelete(new_dir,new_name,isdir):FS.mayCreate(new_dir,new_name);if(errCode){throw new FS.ErrnoError(errCode)}if(!old_dir.node_ops.rename){throw new FS.ErrnoError(63)}if(FS.isMountpoint(old_node)||new_node&&FS.isMountpoint(new_node)){throw new FS.ErrnoError(10)}if(new_dir!==old_dir){errCode=FS.nodePermissions(old_dir,"w");if(errCode){throw new FS.ErrnoError(errCode)}}FS.hashRemoveNode(old_node);try{old_dir.node_ops.rename(old_node,new_dir,new_name);old_node.parent=new_dir}catch(e){throw e}finally{FS.hashAddNode(old_node)}},rmdir(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,true);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.rmdir){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.rmdir(parent,name);FS.destroyNode(node)},readdir(path){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var readdir=FS.checkOpExists(node.node_ops.readdir,54);return readdir(node)},unlink(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,false);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.unlink){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.unlink(parent,name);FS.destroyNode(node)},readlink(path){var lookup=FS.lookupPath(path);var link=lookup.node;if(!link){throw new FS.ErrnoError(44)}if(!link.node_ops.readlink){throw new FS.ErrnoError(28)}return link.node_ops.readlink(link)},stat(path,dontFollow){var lookup=FS.lookupPath(path,{follow:!dontFollow});var node=lookup.node;var getattr=FS.checkOpExists(node.node_ops.getattr,63);return getattr(node)},fstat(fd){var stream=FS.getStreamChecked(fd);var node=stream.node;var getattr=stream.stream_ops.getattr;var arg=getattr?stream:node;getattr??=node.node_ops.getattr;FS.checkOpExists(getattr,63);return getattr(arg)},lstat(path){return FS.stat(path,true)},doChmod(stream,node,mode,dontFollow){FS.doSetAttr(stream,node,{mode:mode&4095|node.mode&~4095,ctime:Date.now(),dontFollow})},chmod(path,mode,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChmod(null,node,mode,dontFollow)},lchmod(path,mode){FS.chmod(path,mode,true)},fchmod(fd,mode){var stream=FS.getStreamChecked(fd);FS.doChmod(stream,stream.node,mode,false)},doChown(stream,node,dontFollow){FS.doSetAttr(stream,node,{timestamp:Date.now(),dontFollow})},chown(path,uid,gid,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChown(null,node,dontFollow)},lchown(path,uid,gid){FS.chown(path,uid,gid,true)},fchown(fd,uid,gid){var stream=FS.getStreamChecked(fd);FS.doChown(stream,stream.node,false)},doTruncate(stream,node,len){if(FS.isDir(node.mode)){throw new FS.ErrnoError(31)}if(!FS.isFile(node.mode)){throw new FS.ErrnoError(28)}var errCode=FS.nodePermissions(node,"w");if(errCode){throw new FS.ErrnoError(errCode)}FS.doSetAttr(stream,node,{size:len,timestamp:Date.now()})},truncate(path,len){if(len<0){throw new FS.ErrnoError(28)}var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:true});node=lookup.node}else{node=path}FS.doTruncate(null,node,len)},ftruncate(fd,len){var stream=FS.getStreamChecked(fd);if(len<0||(stream.flags&2097155)===0){throw new FS.ErrnoError(28)}FS.doTruncate(stream,stream.node,len)},utime(path,atime,mtime){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var setattr=FS.checkOpExists(node.node_ops.setattr,63);setattr(node,{atime,mtime})},open(path,flags,mode=438){if(path===""){throw new FS.ErrnoError(44)}flags=typeof flags=="string"?FS_modeStringToFlags(flags):flags;if(flags&64){mode=mode&4095|32768}else{mode=0}var node;var isDirPath;if(typeof path=="object"){node=path}else{isDirPath=path.endsWith("/");var lookup=FS.lookupPath(path,{follow:!(flags&131072),noent_okay:true});node=lookup.node;path=lookup.path}var created=false;if(flags&64){if(node){if(flags&128){throw new FS.ErrnoError(20)}}else if(isDirPath){throw new FS.ErrnoError(31)}else{node=FS.mknod(path,mode|511,0);created=true}}if(!node){throw new FS.ErrnoError(44)}if(FS.isChrdev(node.mode)){flags&=~512}if(flags&65536&&!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}if(!created){var errCode=FS.mayOpen(node,flags);if(errCode){throw new FS.ErrnoError(errCode)}}if(flags&512&&!created){FS.truncate(node,0)}flags&=~(128|512|131072);var stream=FS.createStream({node,path:FS.getPath(node),flags,seekable:true,position:0,stream_ops:node.stream_ops,ungotten:[],error:false});if(stream.stream_ops.open){stream.stream_ops.open(stream)}if(created){FS.chmod(node,mode&511)}if(Module["logReadFiles"]&&!(flags&1)){if(!(path in FS.readFiles)){FS.readFiles[path]=1}}return stream},close(stream){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(stream.getdents)stream.getdents=null;try{if(stream.stream_ops.close){stream.stream_ops.close(stream)}}catch(e){throw e}finally{FS.closeStream(stream.fd)}stream.fd=null},isClosed(stream){return stream.fd===null},llseek(stream,offset,whence){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(!stream.seekable||!stream.stream_ops.llseek){throw new FS.ErrnoError(70)}if(whence!=0&&whence!=1&&whence!=2){throw new FS.ErrnoError(28)}stream.position=stream.stream_ops.llseek(stream,offset,whence);stream.ungotten=[];return stream.position},read(stream,buffer,offset,length,position){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.read){throw new FS.ErrnoError(28)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesRead=stream.stream_ops.read(stream,buffer,offset,length,position);if(!seeking)stream.position+=bytesRead;return bytesRead},write(stream,buffer,offset,length,position,canOwn){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===0){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.write){throw new FS.ErrnoError(28)}if(stream.seekable&&stream.flags&1024){FS.llseek(stream,0,2)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesWritten=stream.stream_ops.write(stream,buffer,offset,length,position,canOwn);if(!seeking)stream.position+=bytesWritten;return bytesWritten},mmap(stream,length,position,prot,flags){if((prot&2)!==0&&(flags&2)===0&&(stream.flags&2097155)!==2){throw new FS.ErrnoError(2)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(2)}if(!stream.stream_ops.mmap){throw new FS.ErrnoError(43)}if(!length){throw new FS.ErrnoError(28)}return stream.stream_ops.mmap(stream,length,position,prot,flags)},msync(stream,buffer,offset,length,mmapFlags){if(!stream.stream_ops.msync){return 0}return stream.stream_ops.msync(stream,buffer,offset,length,mmapFlags)},ioctl(stream,cmd,arg){if(!stream.stream_ops.ioctl){throw new FS.ErrnoError(59)}return stream.stream_ops.ioctl(stream,cmd,arg)},readFile(path,opts={}){opts.flags=opts.flags||0;opts.encoding=opts.encoding||"binary";if(opts.encoding!=="utf8"&&opts.encoding!=="binary"){abort(`Invalid encoding type "${opts.encoding}"`)}var stream=FS.open(path,opts.flags);var stat=FS.stat(path);var length=stat.size;var buf=new Uint8Array(length);FS.read(stream,buf,0,length,0);if(opts.encoding==="utf8"){buf=UTF8ArrayToString(buf)}FS.close(stream);return buf},writeFile(path,data,opts={}){opts.flags=opts.flags||577;var stream=FS.open(path,opts.flags,opts.mode);if(typeof data=="string"){data=new Uint8Array(intArrayFromString(data,true))}if(ArrayBuffer.isView(data)){FS.write(stream,data,0,data.byteLength,undefined,opts.canOwn)}else{abort("Unsupported data type")}FS.close(stream)},cwd:()=>FS.currentPath,chdir(path){var lookup=FS.lookupPath(path,{follow:true});if(lookup.node===null){throw new FS.ErrnoError(44)}if(!FS.isDir(lookup.node.mode)){throw new FS.ErrnoError(54)}var errCode=FS.nodePermissions(lookup.node,"x");if(errCode){throw new FS.ErrnoError(errCode)}FS.currentPath=lookup.path},createDefaultDirectories(){FS.mkdir("/tmp");FS.mkdir("/home");FS.mkdir("/home/web_user")},createDefaultDevices(){FS.mkdir("/dev");FS.registerDevice(FS.makedev(1,3),{read:()=>0,write:(stream,buffer,offset,length,pos)=>length,llseek:()=>0});FS.mkdev("/dev/null",FS.makedev(1,3));TTY.register(FS.makedev(5,0),TTY.default_tty_ops);TTY.register(FS.makedev(6,0),TTY.default_tty1_ops);FS.mkdev("/dev/tty",FS.makedev(5,0));FS.mkdev("/dev/tty1",FS.makedev(6,0));var randomBuffer=new Uint8Array(1024),randomLeft=0;var randomByte=()=>{if(randomLeft===0){randomFill(randomBuffer);randomLeft=randomBuffer.byteLength}return randomBuffer[--randomLeft]};FS.createDevice("/dev","random",randomByte);FS.createDevice("/dev","urandom",randomByte);FS.mkdir("/dev/shm");FS.mkdir("/dev/shm/tmp")},createSpecialDirectories(){FS.mkdir("/proc");var proc_self=FS.mkdir("/proc/self");FS.mkdir("/proc/self/fd");FS.mount({mount(){var node=FS.createNode(proc_self,"fd",16895,73);node.stream_ops={llseek:MEMFS.stream_ops.llseek};node.node_ops={lookup(parent,name){var fd=+name;var stream=FS.getStreamChecked(fd);var ret={parent:null,mount:{mountpoint:"fake"},node_ops:{readlink:()=>stream.path},id:fd+1};ret.parent=ret;return ret},readdir(){return Array.from(FS.streams.entries()).filter(([k,v])=>v).map(([k,v])=>k.toString())}};return node}},{},"/proc/self/fd")},createStandardStreams(input,output,error){if(input){FS.createDevice("/dev","stdin",input)}else{FS.symlink("/dev/tty","/dev/stdin")}if(output){FS.createDevice("/dev","stdout",null,output)}else{FS.symlink("/dev/tty","/dev/stdout")}if(error){FS.createDevice("/dev","stderr",null,error)}else{FS.symlink("/dev/tty1","/dev/stderr")}var stdin=FS.open("/dev/stdin",0);var stdout=FS.open("/dev/stdout",1);var stderr=FS.open("/dev/stderr",1)},staticInit(){FS.nameTable=new Array(4096);FS.mount(MEMFS,{},"/");FS.createDefaultDirectories();FS.createDefaultDevices();FS.createSpecialDirectories();FS.filesystems={MEMFS}},init(input,output,error){FS.initialized=true;input??=Module["stdin"];output??=Module["stdout"];error??=Module["stderr"];FS.createStandardStreams(input,output,error)},quit(){FS.initialized=false;for(var stream of FS.streams){if(stream){FS.close(stream)}}},findObject(path,dontResolveLastLink){var ret=FS.analyzePath(path,dontResolveLastLink);if(!ret.exists){return null}return ret.object},analyzePath(path,dontResolveLastLink){try{var lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});path=lookup.path}catch(e){}var ret={isRoot:false,exists:false,error:0,name:null,path:null,object:null,parentExists:false,parentPath:null,parentObject:null};try{var lookup=FS.lookupPath(path,{parent:true});ret.parentExists=true;ret.parentPath=lookup.path;ret.parentObject=lookup.node;ret.name=PATH.basename(path);lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});ret.exists=true;ret.path=lookup.path;ret.object=lookup.node;ret.name=lookup.node.name;ret.isRoot=lookup.path==="/"}catch(e){ret.error=e.errno}return ret},createPath(parent,path,canRead,canWrite){parent=typeof parent=="string"?parent:FS.getPath(parent);var parts=path.split("/").reverse();while(parts.length){var part=parts.pop();if(!part)continue;var current=PATH.join2(parent,part);try{FS.mkdir(current)}catch(e){if(e.errno!=20)throw e}parent=current}return current},createFile(parent,name,properties,canRead,canWrite){var path=PATH.join2(typeof parent=="string"?parent:FS.getPath(parent),name);var mode=FS_getMode(canRead,canWrite);return FS.create(path,mode)},createDataFile(parent,name,data,canRead,canWrite,canOwn){var path=name;if(parent){parent=typeof parent=="string"?parent:FS.getPath(parent);path=name?PATH.join2(parent,name):parent}var mode=FS_getMode(canRead,canWrite);var node=FS.create(path,mode);if(data){if(typeof data=="string"){var arr=new Array(data.length);for(var i=0,len=data.length;ithis.length-1||idx<0){return undefined}var chunkOffset=idx%this.chunkSize;var chunkNum=idx/this.chunkSize|0;return this.getter(chunkNum)[chunkOffset]}setDataGetter(getter){this.getter=getter}cacheLength(){var xhr=new XMLHttpRequest;xhr.open("HEAD",url,false);xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);var datalength=Number(xhr.getResponseHeader("Content-length"));var header;var hasByteServing=(header=xhr.getResponseHeader("Accept-Ranges"))&&header==="bytes";var usesGzip=(header=xhr.getResponseHeader("Content-Encoding"))&&header==="gzip";var chunkSize=1024*1024;if(!hasByteServing)chunkSize=datalength;var doXHR=(from,to)=>{if(from>to)abort("invalid range ("+from+", "+to+") or no bytes requested!");if(to>datalength-1)abort("only "+datalength+" bytes available! programmer error!");var xhr=new XMLHttpRequest;xhr.open("GET",url,false);if(datalength!==chunkSize)xhr.setRequestHeader("Range","bytes="+from+"-"+to);xhr.responseType="arraybuffer";if(xhr.overrideMimeType){xhr.overrideMimeType("text/plain; charset=x-user-defined")}xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);if(xhr.response!==undefined){return new Uint8Array(xhr.response||[])}return intArrayFromString(xhr.responseText||"",true)};var lazyArray=this;lazyArray.setDataGetter(chunkNum=>{var start=chunkNum*chunkSize;var end=(chunkNum+1)*chunkSize-1;end=Math.min(end,datalength-1);if(typeof lazyArray.chunks[chunkNum]=="undefined"){lazyArray.chunks[chunkNum]=doXHR(start,end)}if(typeof lazyArray.chunks[chunkNum]=="undefined")abort("doXHR failed!");return lazyArray.chunks[chunkNum]});if(usesGzip||!datalength){chunkSize=datalength=1;datalength=this.getter(0).length;chunkSize=datalength;out("LazyFiles on gzip forces download of the whole file when length is accessed")}this._length=datalength;this._chunkSize=chunkSize;this.lengthKnown=true}get length(){if(!this.lengthKnown){this.cacheLength()}return this._length}get chunkSize(){if(!this.lengthKnown){this.cacheLength()}return this._chunkSize}}if(globalThis.XMLHttpRequest){if(!ENVIRONMENT_IS_WORKER)abort("Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc");var lazyArray=new LazyUint8Array;var properties={isDevice:false,contents:lazyArray}}else{var properties={isDevice:false,url}}var node=FS.createFile(parent,name,properties,canRead,canWrite);if(properties.contents){node.contents=properties.contents}else if(properties.url){node.contents=null;node.url=properties.url}Object.defineProperties(node,{usedBytes:{get:function(){return this.contents.length}}});var stream_ops={};for(const[key,fn]of Object.entries(node.stream_ops)){stream_ops[key]=(...args)=>{FS.forceLoadFile(node);return fn(...args)}}function writeChunks(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=contents.length)return 0;var size=Math.min(contents.length-position,length);if(contents.slice){for(var i=0;i{FS.forceLoadFile(node);return writeChunks(stream,buffer,offset,length,position)};stream_ops.mmap=(stream,length,position,prot,flags)=>{FS.forceLoadFile(node);var ptr=mmapAlloc(length);if(!ptr){throw new FS.ErrnoError(48)}writeChunks(stream,HEAP8,ptr,length,position);return{ptr,allocated:true}};node.stream_ops=stream_ops;return node}};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead,ignoreNul):"";var SYSCALLS={DEFAULT_POLLMASK:5,calculateAt(dirfd,path,allowEmpty){if(PATH.isAbs(path)){return path}var dir;if(dirfd===-100){dir=FS.cwd()}else{var dirstream=SYSCALLS.getStreamFromFD(dirfd);dir=dirstream.path}if(path.length==0){if(!allowEmpty){throw new FS.ErrnoError(44)}return dir}return dir+"/"+path},writeStat(buf,stat){HEAPU32[buf>>2]=stat.dev;HEAPU32[buf+4>>2]=stat.mode;HEAPU32[buf+8>>2]=stat.nlink;HEAPU32[buf+12>>2]=stat.uid;HEAPU32[buf+16>>2]=stat.gid;HEAPU32[buf+20>>2]=stat.rdev;HEAP64[buf+24>>3]=BigInt(stat.size);HEAP32[buf+32>>2]=4096;HEAP32[buf+36>>2]=stat.blocks;var atime=stat.atime.getTime();var mtime=stat.mtime.getTime();var ctime=stat.ctime.getTime();HEAP64[buf+40>>3]=BigInt(Math.floor(atime/1e3));HEAPU32[buf+48>>2]=atime%1e3*1e3*1e3;HEAP64[buf+56>>3]=BigInt(Math.floor(mtime/1e3));HEAPU32[buf+64>>2]=mtime%1e3*1e3*1e3;HEAP64[buf+72>>3]=BigInt(Math.floor(ctime/1e3));HEAPU32[buf+80>>2]=ctime%1e3*1e3*1e3;HEAP64[buf+88>>3]=BigInt(stat.ino);return 0},writeStatFs(buf,stats){HEAPU32[buf+4>>2]=stats.bsize;HEAPU32[buf+60>>2]=stats.bsize;HEAP64[buf+8>>3]=BigInt(stats.blocks);HEAP64[buf+16>>3]=BigInt(stats.bfree);HEAP64[buf+24>>3]=BigInt(stats.bavail);HEAP64[buf+32>>3]=BigInt(stats.files);HEAP64[buf+40>>3]=BigInt(stats.ffree);HEAPU32[buf+48>>2]=stats.fsid;HEAPU32[buf+64>>2]=stats.flags;HEAPU32[buf+56>>2]=stats.namelen},doMsync(addr,stream,len,flags,offset){if(!FS.isFile(stream.node.mode)){throw new FS.ErrnoError(43)}if(flags&2){return 0}var buffer=HEAPU8.slice(addr,addr+len);FS.msync(stream,buffer,offset,len,flags)},getStreamFromFD(fd){var stream=FS.getStreamChecked(fd);return stream},varargs:undefined,getStr(ptr){var ret=UTF8ToString(ptr);return ret}};function ___syscall_fcntl64(fd,cmd,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(cmd){case 0:{var arg=syscallGetVarargI();if(arg<0){return-28}while(FS.streams[arg]){arg++}var newStream;newStream=FS.dupStream(stream,arg);return newStream.fd}case 1:case 2:return 0;case 3:return stream.flags;case 4:{var arg=syscallGetVarargI();stream.flags|=arg;return 0}case 12:{var arg=syscallGetVarargP();var offset=0;HEAP16[arg+offset>>1]=2;return 0}case 13:case 14:return 0}return-28}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_fstat64(fd,buf){try{return SYSCALLS.writeStat(buf,FS.fstat(fd))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_ioctl(fd,op,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(op){case 21509:{if(!stream.tty)return-59;return 0}case 21505:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcgets){var termios=stream.tty.ops.ioctl_tcgets(stream);var argp=syscallGetVarargP();HEAP32[argp>>2]=termios.c_iflag||0;HEAP32[argp+4>>2]=termios.c_oflag||0;HEAP32[argp+8>>2]=termios.c_cflag||0;HEAP32[argp+12>>2]=termios.c_lflag||0;for(var i=0;i<32;i++){HEAP8[argp+i+17]=termios.c_cc[i]||0}return 0}return 0}case 21510:case 21511:case 21512:{if(!stream.tty)return-59;return 0}case 21506:case 21507:case 21508:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcsets){var argp=syscallGetVarargP();var c_iflag=HEAP32[argp>>2];var c_oflag=HEAP32[argp+4>>2];var c_cflag=HEAP32[argp+8>>2];var c_lflag=HEAP32[argp+12>>2];var c_cc=[];for(var i=0;i<32;i++){c_cc.push(HEAP8[argp+i+17])}return stream.tty.ops.ioctl_tcsets(stream.tty,op,{c_iflag,c_oflag,c_cflag,c_lflag,c_cc})}return 0}case 21519:{if(!stream.tty)return-59;var argp=syscallGetVarargP();HEAP32[argp>>2]=0;return 0}case 21520:{if(!stream.tty)return-59;return-28}case 21537:case 21531:{var argp=syscallGetVarargP();return FS.ioctl(stream,op,argp)}case 21523:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tiocgwinsz){var winsize=stream.tty.ops.ioctl_tiocgwinsz(stream.tty);var argp=syscallGetVarargP();HEAP16[argp>>1]=winsize[0];HEAP16[argp+2>>1]=winsize[1]}return 0}case 21524:{if(!stream.tty)return-59;return 0}case 21515:{if(!stream.tty)return-59;return 0}default:return-28}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_newfstatat(dirfd,path,buf,flags){try{path=SYSCALLS.getStr(path);var nofollow=flags&256;var allowEmpty=flags&4096;flags=flags&~6400;path=SYSCALLS.calculateAt(dirfd,path,allowEmpty);return SYSCALLS.writeStat(buf,nofollow?FS.lstat(path):FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_openat(dirfd,path,flags,varargs){SYSCALLS.varargs=varargs;try{path=SYSCALLS.getStr(path);path=SYSCALLS.calculateAt(dirfd,path);var mode=varargs?syscallGetVarargI():0;return FS.open(path,flags,mode).fd}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __abort_js=()=>abort("");var structRegistrations={};var runDestructors=destructors=>{while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}};function readPointer(pointer){return this.fromWireType(HEAPU32[pointer>>2])}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var InternalError=class InternalError extends Error{constructor(message){super(message);this.name="InternalError"}};var throwInternalError=message=>{throw new InternalError(message)};var whenDependentTypesAreResolved=(myTypes,dependentTypes,getTypeConverters)=>{myTypes.forEach(type=>typeDependencies[type]=dependentTypes);function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}}if(0===unregisteredTypes.length){onComplete(typeConverters)}};var __embind_finalize_value_object=structType=>{var reg=structRegistrations[structType];delete structRegistrations[structType];var rawConstructor=reg.rawConstructor;var rawDestructor=reg.rawDestructor;var fieldRecords=reg.fields;var fieldTypes=fieldRecords.map(field=>field.getterReturnType).concat(fieldRecords.map(field=>field.setterArgumentType));whenDependentTypesAreResolved([structType],fieldTypes,fieldTypes=>{var fields={};for(var[i,field]of fieldRecords.entries()){const getterReturnType=fieldTypes[i];const getter=field.getter;const getterContext=field.getterContext;const setterArgumentType=fieldTypes[i+fieldRecords.length];const setter=field.setter;const setterContext=field.setterContext;fields[field.fieldName]={read:ptr=>getterReturnType.fromWireType(getter(getterContext,ptr)),write:(ptr,o)=>{var destructors=[];setter(setterContext,ptr,setterArgumentType.toWireType(destructors,o));runDestructors(destructors)},optional:getterReturnType.optional}}return[{name:reg.name,fromWireType:ptr=>{var rv={};for(var i in fields){rv[i]=fields[i].read(ptr)}rawDestructor(ptr);return rv},toWireType:(destructors,o)=>{for(var fieldName in fields){if(!(fieldName in o)&&!fields[fieldName].optional){throw new TypeError(`Missing field: "${fieldName}"`)}}var ptr=rawConstructor();for(fieldName in fields){fields[fieldName].write(ptr,o[fieldName])}if(destructors!==null){destructors.push(rawDestructor,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction:rawDestructor}]})};var AsciiToString=ptr=>{var str="";while(1){var ch=HEAPU8[ptr++];if(!ch)return str;str+=String.fromCharCode(ch)}};var BindingError=class BindingError extends Error{constructor(message){super(message);this.name="BindingError"}};var throwBindingError=message=>{throw new BindingError(message)};function sharedRegisterType(rawType,registeredInstance,options={}){var name=registeredInstance.name;if(!rawType){throwBindingError(`type "${name}" must have a positive integer typeid pointer`)}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError(`Cannot register type '${name}' twice`)}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function registerType(rawType,registeredInstance,options={}){return sharedRegisterType(rawType,registeredInstance,options)}var integerReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?pointer=>HEAP8[pointer]:pointer=>HEAPU8[pointer];case 2:return signed?pointer=>HEAP16[pointer>>1]:pointer=>HEAPU16[pointer>>1];case 4:return signed?pointer=>HEAP32[pointer>>2]:pointer=>HEAPU32[pointer>>2];case 8:return signed?pointer=>HEAP64[pointer>>3]:pointer=>HEAPU64[pointer>>3];default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_bigint=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0n;let fromWireType=value=>value;if(isUnsignedType){const bitSize=size*8;fromWireType=value=>BigInt.asUintN(bitSize,value);maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>{if(typeof value=="number"){value=BigInt(value)}return value},readValueFromPointer:integerReadValueFromPointer(name,size,!isUnsignedType),destructorFunction:null})};var __embind_register_bool=(rawType,name,trueValue,falseValue)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:function(wt){return!!wt},toWireType:function(destructors,o){return o?trueValue:falseValue},readValueFromPointer:function(pointer){return this.fromWireType(HEAPU8[pointer])},destructorFunction:null})};var shallowCopyInternalPointer=o=>({count:o.count,deleteScheduled:o.deleteScheduled,preservePointerOnDelete:o.preservePointerOnDelete,ptr:o.ptr,ptrType:o.ptrType,smartPtr:o.smartPtr,smartPtrType:o.smartPtrType});var throwInstanceAlreadyDeleted=obj=>{function getInstanceTypeName(handle){return handle.$$.ptrType.registeredClass.name}throwBindingError(getInstanceTypeName(obj)+" instance already deleted")};var finalizationRegistry=false;var detachFinalizer=handle=>{};var runDestructor=$$=>{if($$.smartPtr){$$.smartPtrType.rawDestructor($$.smartPtr)}else{$$.ptrType.registeredClass.rawDestructor($$.ptr)}};var releaseClassHandle=$$=>{$$.count.value-=1;var toDelete=0===$$.count.value;if(toDelete){runDestructor($$)}};var attachFinalizer=handle=>{if(!globalThis.FinalizationRegistry){attachFinalizer=handle=>handle;return handle}finalizationRegistry=new FinalizationRegistry(info=>{releaseClassHandle(info.$$)});attachFinalizer=handle=>{var $$=handle.$$;var hasSmartPtr=!!$$.smartPtr;if(hasSmartPtr){var info={$$};finalizationRegistry.register(handle,info,handle)}return handle};detachFinalizer=handle=>finalizationRegistry.unregister(handle);return attachFinalizer(handle)};var deletionQueue=[];var flushPendingDeletes=()=>{while(deletionQueue.length){var obj=deletionQueue.pop();obj.$$.deleteScheduled=false;obj["delete"]()}};var delayFunction;var init_ClassHandle=()=>{let proto=ClassHandle.prototype;Object.assign(proto,{isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(other instanceof ClassHandle)){return false}var leftClass=this.$$.ptrType.registeredClass;var left=this.$$.ptr;other.$$=other.$$;var rightClass=other.$$.ptrType.registeredClass;var right=other.$$.ptr;while(leftClass.baseClass){left=leftClass.upcast(left);leftClass=leftClass.baseClass}while(rightClass.baseClass){right=rightClass.upcast(right);rightClass=rightClass.baseClass}return leftClass===rightClass&&left===right},clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.preservePointerOnDelete){this.$$.count.value+=1;return this}else{var clone=attachFinalizer(Object.create(Object.getPrototypeOf(this),{$$:{value:shallowCopyInternalPointer(this.$$)}}));clone.$$.count.value+=1;clone.$$.deleteScheduled=false;return clone}},delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}detachFinalizer(this);releaseClassHandle(this.$$);if(!this.$$.preservePointerOnDelete){this.$$.smartPtr=undefined;this.$$.ptr=undefined}},isDeleted(){return!this.$$.ptr},deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}deletionQueue.push(this);if(deletionQueue.length===1&&delayFunction){delayFunction(flushPendingDeletes)}this.$$.deleteScheduled=true;return this}});const symbolDispose=Symbol.dispose;if(symbolDispose){proto[symbolDispose]=proto["delete"]}};function ClassHandle(){}var createNamedFunction=(name,func)=>Object.defineProperty(func,"name",{value:name});var registeredPointers={};var ensureOverloadTable=(proto,methodName,humanName)=>{if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(...args){if(!proto[methodName].overloadTable.hasOwnProperty(args.length)){throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`)}return proto[methodName].overloadTable[args.length].apply(this,args)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}};var exposePublicSymbol=(name,value,numArguments)=>{if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError(`Cannot register public name '${name}' twice`)}ensureOverloadTable(Module,name,name);if(Module[name].overloadTable.hasOwnProperty(numArguments)){throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`)}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var char_0=48;var char_9=57;var makeLegalFunctionName=name=>{name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return`_${name}`}return name};function RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast){this.name=name;this.constructor=constructor;this.instancePrototype=instancePrototype;this.rawDestructor=rawDestructor;this.baseClass=baseClass;this.getActualType=getActualType;this.upcast=upcast;this.downcast=downcast;this.pureVirtualFunctions=[]}var upcastPointer=(ptr,ptrClass,desiredClass)=>{while(ptrClass!==desiredClass){if(!ptrClass.upcast){throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`)}ptr=ptrClass.upcast(ptr);ptrClass=ptrClass.baseClass}return ptr};var embindRepr=v=>{if(v===null){return"null"}var t=typeof v;if(t==="object"||t==="array"||t==="function"){return v.toString()}else{return""+v}};function constNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}function genericPointerToWireType(destructors,handle){var ptr;if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}if(this.isSmartPointer){ptr=this.rawConstructor();if(destructors!==null){destructors.push(this.rawDestructor,ptr)}return ptr}else{return 0}}if(!handle||!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(!this.isConst&&handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);if(this.isSmartPointer){if(undefined===handle.$$.smartPtr){throwBindingError("Passing raw pointer to smart pointer is illegal")}switch(this.sharingPolicy){case 0:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}break;case 1:ptr=handle.$$.smartPtr;break;case 2:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{var clonedHandle=handle["clone"]();ptr=this.rawShare(ptr,Emval.toHandle(()=>clonedHandle["delete"]()));if(destructors!==null){destructors.push(this.rawDestructor,ptr)}}break;default:throwBindingError("Unsupporting sharing policy")}}return ptr}function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}var downcastPointer=(ptr,ptrClass,desiredClass)=>{if(ptrClass===desiredClass){return ptr}if(undefined===desiredClass.baseClass){return null}var rv=downcastPointer(ptr,ptrClass,desiredClass.baseClass);if(rv===null){return null}return desiredClass.downcast(rv)};var registeredInstances={};var getBasestPointer=(class_,ptr)=>{if(ptr===undefined){throwBindingError("ptr should not be undefined")}while(class_.baseClass){ptr=class_.upcast(ptr);class_=class_.baseClass}return ptr};var getInheritedInstance=(class_,ptr)=>{ptr=getBasestPointer(class_,ptr);return registeredInstances[ptr]};var makeClassHandle=(prototype,record)=>{if(!record.ptrType||!record.ptr){throwInternalError("makeClassHandle requires ptr and ptrType")}var hasSmartPtrType=!!record.smartPtrType;var hasSmartPtr=!!record.smartPtr;if(hasSmartPtrType!==hasSmartPtr){throwInternalError("Both smartPtrType and smartPtr must be specified")}record.count={value:1};return attachFinalizer(Object.create(prototype,{$$:{value:record,writable:true}}))};function RegisteredPointer_fromWireType(ptr){var rawPointer=this.getPointee(ptr);if(!rawPointer){this.destructor(ptr);return null}var registeredInstance=getInheritedInstance(this.registeredClass,rawPointer);if(undefined!==registeredInstance){if(0===registeredInstance.$$.count.value){registeredInstance.$$.ptr=rawPointer;registeredInstance.$$.smartPtr=ptr;return registeredInstance["clone"]()}else{var rv=registeredInstance["clone"]();this.destructor(ptr);return rv}}function makeDefaultHandle(){if(this.isSmartPointer){return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this.pointeeType,ptr:rawPointer,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this,ptr})}}var actualType=this.registeredClass.getActualType(rawPointer);var registeredPointerRecord=registeredPointers[actualType];if(!registeredPointerRecord){return makeDefaultHandle.call(this)}var toType;if(this.isConst){toType=registeredPointerRecord.constPointerType}else{toType=registeredPointerRecord.pointerType}var dp=downcastPointer(rawPointer,this.registeredClass,toType.registeredClass);if(dp===null){return makeDefaultHandle.call(this)}if(this.isSmartPointer){return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp})}}var init_RegisteredPointer=()=>{Object.assign(RegisteredPointer.prototype,{getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}return ptr},destructor(ptr){this.rawDestructor?.(ptr)},readValueFromPointer:readPointer,fromWireType:RegisteredPointer_fromWireType})};function RegisteredPointer(name,registeredClass,isReference,isConst,isSmartPointer,pointeeType,sharingPolicy,rawGetPointee,rawConstructor,rawShare,rawDestructor){this.name=name;this.registeredClass=registeredClass;this.isReference=isReference;this.isConst=isConst;this.isSmartPointer=isSmartPointer;this.pointeeType=pointeeType;this.sharingPolicy=sharingPolicy;this.rawGetPointee=rawGetPointee;this.rawConstructor=rawConstructor;this.rawShare=rawShare;this.rawDestructor=rawDestructor;if(!isSmartPointer&®isteredClass.baseClass===undefined){if(isConst){this.toWireType=constNoSmartPtrRawPointerToWireType;this.destructorFunction=null}else{this.toWireType=nonConstNoSmartPtrRawPointerToWireType;this.destructorFunction=null}}else{this.toWireType=genericPointerToWireType}}var replacePublicSymbol=(name,value,numArguments)=>{if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistent public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var embind__requireFunction=(signature,rawFunction,isAsync=false)=>{signature=AsciiToString(signature);function makeDynCaller(){var rtn=getWasmTableEntry(rawFunction);return rtn}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`)}return fp};class UnboundTypeError extends Error{}var getTypeName=type=>{var ptr=___getTypeName(type);var rv=AsciiToString(ptr);_free(ptr);return rv};var throwUnboundTypeError=(message,types)=>{var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(`${message}: `+unboundTypes.map(getTypeName).join([", "]))};var __embind_register_class=(rawType,rawPointerType,rawConstPointerType,baseClassRawType,getActualTypeSignature,getActualType,upcastSignature,upcast,downcastSignature,downcast,name,destructorSignature,rawDestructor)=>{name=AsciiToString(name);getActualType=embind__requireFunction(getActualTypeSignature,getActualType);upcast&&=embind__requireFunction(upcastSignature,upcast);downcast&&=embind__requireFunction(downcastSignature,downcast);rawDestructor=embind__requireFunction(destructorSignature,rawDestructor);var legalFunctionName=makeLegalFunctionName(name);exposePublicSymbol(legalFunctionName,function(){throwUnboundTypeError(`Cannot construct ${name} due to unbound types`,[baseClassRawType])});whenDependentTypesAreResolved([rawType,rawPointerType,rawConstPointerType],baseClassRawType?[baseClassRawType]:[],base=>{base=base[0];var baseClass;var basePrototype;if(baseClassRawType){baseClass=base.registeredClass;basePrototype=baseClass.instancePrototype}else{basePrototype=ClassHandle.prototype}var constructor=createNamedFunction(name,function(...args){if(Object.getPrototypeOf(this)!==instancePrototype){throw new BindingError(`Use 'new' to construct ${name}`)}if(undefined===registeredClass.constructor_body){throw new BindingError(`${name} has no accessible constructor`)}var body=registeredClass.constructor_body[args.length];if(undefined===body){throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`)}return body.apply(this,args)});var instancePrototype=Object.create(basePrototype,{constructor:{value:constructor}});constructor.prototype=instancePrototype;var registeredClass=new RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast);if(registeredClass.baseClass){registeredClass.baseClass.__derivedClasses??=[];registeredClass.baseClass.__derivedClasses.push(registeredClass)}var referenceConverter=new RegisteredPointer(name,registeredClass,true,false,false);var pointerConverter=new RegisteredPointer(name+"*",registeredClass,false,false,false);var constPointerConverter=new RegisteredPointer(name+" const*",registeredClass,false,true,false);registeredPointers[rawType]={pointerType:pointerConverter,constPointerType:constPointerConverter};replacePublicSymbol(legalFunctionName,constructor);return[referenceConverter,pointerConverter,constPointerConverter]})};var heap32VectorToArray=(count,firstElement)=>{var array=[];for(var i=0;i>2])}return array};function usesDestructorStack(argTypes){for(var i=1;i{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);invoker=embind__requireFunction(invokerSignature,invoker);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`constructor ${classType.name}`;if(undefined===classType.registeredClass.constructor_body){classType.registeredClass.constructor_body=[]}if(undefined!==classType.registeredClass.constructor_body[argCount-1]){throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`)}classType.registeredClass.constructor_body[argCount-1]=()=>{throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`,rawArgTypes)};whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{argTypes.splice(1,0,null);classType.registeredClass.constructor_body[argCount-1]=craftInvokerFunction(humanName,argTypes,null,invoker,rawConstructor);return[]});return[]})};var getFunctionName=signature=>{signature=signature.trim();const argsIndex=signature.indexOf("(");if(argsIndex===-1)return signature;return signature.slice(0,argsIndex)};var __embind_register_class_function=(rawClassType,methodName,argCount,rawArgTypesAddr,invokerSignature,rawInvoker,context,isPureVirtual,isAsync,isNonnullReturn)=>{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);methodName=AsciiToString(methodName);methodName=getFunctionName(methodName);rawInvoker=embind__requireFunction(invokerSignature,rawInvoker,isAsync);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`${classType.name}.${methodName}`;if(methodName.startsWith("@@")){methodName=Symbol[methodName.substring(2)]}if(isPureVirtual){classType.registeredClass.pureVirtualFunctions.push(methodName)}function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`,rawArgTypes)}var proto=classType.registeredClass.instancePrototype;var method=proto[methodName];if(undefined===method||undefined===method.overloadTable&&method.className!==classType.name&&method.argCount===argCount-2){unboundTypesHandler.argCount=argCount-2;unboundTypesHandler.className=classType.name;proto[methodName]=unboundTypesHandler}else{ensureOverloadTable(proto,methodName,humanName);proto[methodName].overloadTable[argCount-2]=unboundTypesHandler}whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{var memberFunction=craftInvokerFunction(humanName,argTypes,classType,rawInvoker,context,isAsync);if(undefined===proto[methodName].overloadTable){memberFunction.argCount=argCount-2;proto[methodName]=memberFunction}else{proto[methodName].overloadTable[argCount-2]=memberFunction}return[]});return[]})};var __embind_register_constant=(name,type,value)=>{name=AsciiToString(name);whenDependentTypesAreResolved([],[type],type=>{type=type[0];Module[name]=type.fromWireType(value);return[]})};var emval_freelist=[];var emval_handles=[0,1,,1,null,1,true,1,false,1];var __emval_decref=handle=>{if(handle>9&&0===--emval_handles[handle+1]){emval_handles[handle]=undefined;emval_freelist.push(handle)}};var Emval={toValue:handle=>{if(!handle){throwBindingError(`Cannot use deleted val. handle = ${handle}`)}return emval_handles[handle]},toHandle:value=>{switch(value){case undefined:return 2;case null:return 4;case true:return 6;case false:return 8;default:{const handle=emval_freelist.pop()||emval_handles.length;emval_handles[handle]=value;emval_handles[handle+1]=1;return handle}}}};var EmValType={name:"emscripten::val",fromWireType:handle=>{var rv=Emval.toValue(handle);__emval_decref(handle);return rv},toWireType:(destructors,value)=>Emval.toHandle(value),readValueFromPointer:readPointer,destructorFunction:null};var __embind_register_emval=rawType=>registerType(rawType,EmValType);var enumReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?function(pointer){return this.fromWireType(HEAP8[pointer])}:function(pointer){return this.fromWireType(HEAPU8[pointer])};case 2:return signed?function(pointer){return this.fromWireType(HEAP16[pointer>>1])}:function(pointer){return this.fromWireType(HEAPU16[pointer>>1])};case 4:return signed?function(pointer){return this.fromWireType(HEAP32[pointer>>2])}:function(pointer){return this.fromWireType(HEAPU32[pointer>>2])};default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_enum=(rawType,name,size,isSigned)=>{name=AsciiToString(name);function ctor(){}ctor.values={};registerType(rawType,{name,constructor:ctor,fromWireType:function(c){return this.constructor.values[c]},toWireType:(destructors,c)=>c.value,readValueFromPointer:enumReadValueFromPointer(name,size,isSigned),destructorFunction:null});exposePublicSymbol(name,ctor)};var requireRegisteredType=(rawType,humanName)=>{var impl=registeredTypes[rawType];if(undefined===impl){throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`)}return impl};var __embind_register_enum_value=(rawEnumType,name,enumValue)=>{var enumType=requireRegisteredType(rawEnumType,"enum");name=AsciiToString(name);var Enum=enumType.constructor;var Value=Object.create(enumType.constructor.prototype,{value:{value:enumValue},constructor:{value:createNamedFunction(`${enumType.name}_${name}`,function(){})}});Enum.values[enumValue]=Value;Enum[name]=Value};var floatReadValueFromPointer=(name,width)=>{switch(width){case 4:return function(pointer){return this.fromWireType(HEAPF32[pointer>>2])};case 8:return function(pointer){return this.fromWireType(HEAPF64[pointer>>3])};default:throw new TypeError(`invalid float width (${width}): ${name}`)}};var __embind_register_float=(rawType,name,size)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:value=>value,toWireType:(destructors,value)=>value,readValueFromPointer:floatReadValueFromPointer(name,size),destructorFunction:null})};var __embind_register_function=(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn,isAsync,isNonnullReturn)=>{var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=AsciiToString(name);name=getFunctionName(name);rawInvoker=embind__requireFunction(signature,rawInvoker,isAsync);exposePublicSymbol(name,function(){throwUnboundTypeError(`Cannot call ${name} due to unbound types`,argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,argTypes=>{var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn,isAsync),argCount-1);return[]})};var __embind_register_integer=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0;let fromWireType=value=>value;if(isUnsignedType){var bitshift=32-8*size;fromWireType=value=>value<>>bitshift;maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>value,readValueFromPointer:integerReadValueFromPointer(name,size,minRange!==0),destructorFunction:null})};var __embind_register_memory_view=(rawType,dataTypeIndex,name)=>{var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){var size=HEAPU32[handle>>2];var data=HEAPU32[handle+4>>2];return new TA(HEAP8.buffer,data,size)}name=AsciiToString(name);registerType(rawType,{name,fromWireType:decodeMemoryView,readValueFromPointer:decodeMemoryView},{ignoreDuplicateRegistrations:true})};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite);var __embind_register_std_string=(rawType,name)=>{name=AsciiToString(name);var stdStringIsUTF8=true;registerType(rawType,{name,fromWireType(value){var length=HEAPU32[value>>2];var payload=value+4;var str;if(stdStringIsUTF8){str=UTF8ToString(payload,length,true)}else{str="";for(var i=0;i>2]=length;if(valueIsOfTypeString){if(stdStringIsUTF8){stringToUTF8(value,ptr,length+1)}else{for(var i=0;i255){_free(base);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+i]=charCode}}}else{HEAPU8.set(value,ptr)}if(destructors!==null){destructors.push(_free,base)}return base},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var UTF16Decoder=globalThis.TextDecoder?new TextDecoder("utf-16le"):undefined;var UTF16ToString=(ptr,maxBytesToRead,ignoreNul)=>{var idx=ptr>>1;var endIdx=findStringEnd(HEAPU16,idx,maxBytesToRead/2,ignoreNul);if(endIdx-idx>16&&UTF16Decoder)return UTF16Decoder.decode(HEAPU16.subarray(idx,endIdx));var str="";for(var i=idx;i{maxBytesToWrite??=2147483647;if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr};var lengthBytesUTF16=str=>str.length*2;var UTF32ToString=(ptr,maxBytesToRead,ignoreNul)=>{var str="";var startIdx=ptr>>2;for(var i=0;!(i>=maxBytesToRead/4);i++){var utf32=HEAPU32[startIdx+i];if(!utf32&&!ignoreNul)break;str+=String.fromCodePoint(utf32)}return str};var stringToUTF32=(str,outPtr,maxBytesToWrite)=>{maxBytesToWrite??=2147483647;if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i65535){i++}HEAP32[outPtr>>2]=codePoint;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr};var lengthBytesUTF32=str=>{var len=0;for(var i=0;i65535){i++}len+=4}return len};var __embind_register_std_wstring=(rawType,charSize,name)=>{name=AsciiToString(name);var decodeString,encodeString,lengthBytesUTF;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16}else{decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32}registerType(rawType,{name,fromWireType:value=>{var length=HEAPU32[value>>2];var str=decodeString(value+4,length*charSize,true);_free(value);return str},toWireType:(destructors,value)=>{if(!(typeof value=="string")){throwBindingError(`Cannot pass non-string to C++ string type ${name}`)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length/charSize;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var __embind_register_value_object=(rawType,name,constructorSignature,rawConstructor,destructorSignature,rawDestructor)=>{structRegistrations[rawType]={name:AsciiToString(name),rawConstructor:embind__requireFunction(constructorSignature,rawConstructor),rawDestructor:embind__requireFunction(destructorSignature,rawDestructor),fields:[]}};var __embind_register_value_object_field=(structType,fieldName,getterReturnType,getterSignature,getter,getterContext,setterArgumentType,setterSignature,setter,setterContext)=>{structRegistrations[structType].fields.push({fieldName:AsciiToString(fieldName),getterReturnType,getter:embind__requireFunction(getterSignature,getter),getterContext,setterArgumentType,setter:embind__requireFunction(setterSignature,setter),setterContext})};var __embind_register_void=(rawType,name)=>{name=AsciiToString(name);registerType(rawType,{isVoid:true,name,fromWireType:()=>undefined,toWireType:(destructors,o)=>undefined})};var __emscripten_throw_longjmp=()=>{throw Infinity};var emval_methodCallers=[];var emval_addMethodCaller=caller=>{var id=emval_methodCallers.length;emval_methodCallers.push(caller);return id};var emval_lookupTypes=(argCount,argTypes)=>{var a=new Array(argCount);for(var i=0;i>2],`parameter ${i}`)}return a};var emval_returnValue=(toReturnWire,destructorsRef,handle)=>{var destructors=[];var result=toReturnWire(destructors,handle);if(destructors.length){HEAPU32[destructorsRef>>2]=Emval.toHandle(destructors)}return result};var emval_symbols={};var getStringOrSymbol=address=>{var symbol=emval_symbols[address];if(symbol===undefined){return AsciiToString(address)}return symbol};var __emval_create_invoker=(argCount,argTypesPtr,kind)=>{var GenericWireTypeSize=8;var[retType,...argTypes]=emval_lookupTypes(argCount,argTypesPtr);var toReturnWire=retType.toWireType.bind(retType);var argFromPtr=argTypes.map(type=>type.readValueFromPointer.bind(type));argCount--;var captures={toValue:Emval.toValue};var args=argFromPtr.map((argFromPtr,i)=>{var captureName=`argFromPtr${i}`;captures[captureName]=argFromPtr;return`${captureName}(args${i?"+"+i*GenericWireTypeSize:""})`});var functionBody;switch(kind){case 0:functionBody="toValue(handle)";break;case 2:functionBody="new (toValue(handle))";break;case 3:functionBody="";break;case 1:captures["getStringOrSymbol"]=getStringOrSymbol;functionBody="toValue(handle)[getStringOrSymbol(methodName)]";break}functionBody+=`(${args})`;if(!retType.isVoid){captures["toReturnWire"]=toReturnWire;captures["emval_returnValue"]=emval_returnValue;functionBody=`return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`}functionBody=`return function (handle, methodName, destructorsRef, args) {\n ${functionBody}\n }`;var invokerFunction=new Function(Object.keys(captures),functionBody)(...Object.values(captures));var functionName=`methodCaller<(${argTypes.map(t=>t.name)}) => ${retType.name}>`;return emval_addMethodCaller(createNamedFunction(functionName,invokerFunction))};var __emval_get_property=(handle,key)=>{handle=Emval.toValue(handle);key=Emval.toValue(key);return Emval.toHandle(handle[key])};var __emval_incref=handle=>{if(handle>9){emval_handles[handle+1]+=1}};var __emval_invoke=(caller,handle,methodName,destructorsRef,args)=>emval_methodCallers[caller](handle,methodName,destructorsRef,args);var __emval_new_cstring=v=>Emval.toHandle(getStringOrSymbol(v));var __emval_run_destructors=handle=>{var destructors=Emval.toValue(handle);runDestructors(destructors);__emval_decref(handle)};var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function __mmap_js(len,prot,flags,fd,offset,allocated,addr){offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);var res=FS.mmap(stream,len,offset,prot,flags);var ptr=res.ptr;HEAP32[allocated>>2]=res.allocated;HEAPU32[addr>>2]=ptr;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function __munmap_js(addr,len,prot,flags,fd,offset){offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);if(prot&2){SYSCALLS.doMsync(addr,stream,len,flags,offset)}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __tzset_js=(timezone,daylight,std_name,dst_name)=>{var currentYear=(new Date).getFullYear();var winter=new Date(currentYear,0,1);var summer=new Date(currentYear,6,1);var winterOffset=winter.getTimezoneOffset();var summerOffset=summer.getTimezoneOffset();var stdTimezoneOffset=Math.max(winterOffset,summerOffset);HEAPU32[timezone>>2]=stdTimezoneOffset*60;HEAP32[daylight>>2]=Number(winterOffset!=summerOffset);var extractZone=timezoneOffset=>{var sign=timezoneOffset>=0?"-":"+";var absOffset=Math.abs(timezoneOffset);var hours=String(Math.floor(absOffset/60)).padStart(2,"0");var minutes=String(absOffset%60).padStart(2,"0");return`UTC${sign}${hours}${minutes}`};var winterName=extractZone(winterOffset);var summerName=extractZone(summerOffset);if(summerOffsetperformance.now();var _emscripten_date_now=()=>Date.now();var nowIsMonotonic=1;var checkWasiClock=clock_id=>clock_id>=0&&clock_id<=3;function _clock_time_get(clk_id,ignored_precision,ptime){ignored_precision=bigintToI53Checked(ignored_precision);if(!checkWasiClock(clk_id)){return 28}var now;if(clk_id===0){now=_emscripten_date_now()}else if(nowIsMonotonic){now=_emscripten_get_now()}else{return 52}var nsec=Math.round(now*1e3*1e3);HEAP64[ptime>>3]=BigInt(nsec);return 0}var getHeapMax=()=>2147483648;var _emscripten_get_heap_max=()=>getHeapMax();var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.language||"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};var _environ_get=(__environ,environ_buf)=>{var bufSize=0;var envp=0;for(var string of getEnvStrings()){var ptr=environ_buf+bufSize;HEAPU32[__environ+envp>>2]=ptr;bufSize+=stringToUTF8(string,ptr,Infinity)+1;envp+=4}return 0};var _environ_sizes_get=(penviron_count,penviron_buf_size)=>{var strings=getEnvStrings();HEAPU32[penviron_count>>2]=strings.length;var bufSize=0;for(var string of strings){bufSize+=lengthBytesUTF8(string)+1}HEAPU32[penviron_buf_size>>2]=bufSize;return 0};function _fd_close(fd){try{var stream=SYSCALLS.getStreamFromFD(fd);FS.close(stream);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doReadv=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.read(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}function _fd_seek(fd,offset,whence,newOffset){offset=bigintToI53Checked(offset);try{if(isNaN(offset))return 61;var stream=SYSCALLS.getStreamFromFD(fd);FS.llseek(stream,offset,whence);HEAP64[newOffset>>3]=BigInt(stream.position);if(stream.getdents&&offset===0&&whence===0)stream.getdents=null;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doWritev=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.write(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}FS.createPreloadedFile=FS_createPreloadedFile;FS.preloadFile=FS_preloadFile;FS.staticInit();init_ClassHandle();init_RegisteredPointer();{if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["preloadPlugins"])preloadPlugins=Module["preloadPlugins"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}var ___getTypeName,_free,_malloc,_emscripten_builtin_memalign,_setThrew,__emscripten_stack_restore,_emscripten_stack_get_current,memory,__indirect_function_table,wasmMemory,wasmTable;function assignWasmExports(wasmExports){___getTypeName=wasmExports["aa"];_free=wasmExports["ba"];_malloc=wasmExports["da"];_emscripten_builtin_memalign=wasmExports["ea"];_setThrew=wasmExports["fa"];__emscripten_stack_restore=wasmExports["ga"];_emscripten_stack_get_current=wasmExports["ha"];memory=wasmMemory=wasmExports["_"];__indirect_function_table=wasmTable=wasmExports["ca"]}var wasmImports={s:___cxa_throw,z:___syscall_fcntl64,N:___syscall_fstat64,P:___syscall_ioctl,M:___syscall_newfstatat,A:___syscall_openat,S:__abort_js,r:__embind_finalize_value_object,C:__embind_register_bigint,Y:__embind_register_bool,u:__embind_register_class,t:__embind_register_class_constructor,a:__embind_register_class_function,d:__embind_register_constant,W:__embind_register_emval,l:__embind_register_enum,b:__embind_register_enum_value,B:__embind_register_float,f:__embind_register_function,m:__embind_register_integer,j:__embind_register_memory_view,X:__embind_register_std_string,w:__embind_register_std_wstring,p:__embind_register_value_object,c:__embind_register_value_object_field,Z:__embind_register_void,H:__emscripten_throw_longjmp,i:__emval_create_invoker,e:__emval_decref,n:__emval_get_property,k:__emval_incref,h:__emval_invoke,o:__emval_new_cstring,g:__emval_run_destructors,J:__mmap_js,K:__munmap_js,E:__tzset_js,R:_clock_time_get,Q:_emscripten_date_now,D:_emscripten_get_heap_max,x:_emscripten_get_now,I:_emscripten_resize_heap,F:_environ_get,G:_environ_sizes_get,v:_fd_close,O:_fd_read,L:_fd_seek,y:_fd_write,U:invoke_ii,q:invoke_vi,T:invoke_vii,V:invoke_viii};function invoke_vi(index,a1){var sp=stackSave();try{getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viii(index,a1,a2,a3){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_ii(index,a1){var sp=stackSave();try{return getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vii(index,a1,a2){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function run(){if(runDependencies>0){dependenciesFulfilled=run;return}preRun();if(runDependencies>0){dependenciesFulfilled=run;return}function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +;return moduleRtn}})();if(typeof exports==="object"&&typeof module==="object"){module.exports=BASIS;module.exports.default=BASIS}else if(typeof define==="function"&&define["amd"])define([],()=>BASIS); diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder.wasm b/external/basis_universal/webgl/encoder/build/basis_encoder.wasm index 9e62a1bb6c..c77127dcc3 100644 Binary files a/external/basis_universal/webgl/encoder/build/basis_encoder.wasm and b/external/basis_universal/webgl/encoder/build/basis_encoder.wasm differ diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder_threads.js b/external/basis_universal/webgl/encoder/build/basis_encoder_threads.js new file mode 100644 index 0000000000..0d4a5da28e --- /dev/null +++ b/external/basis_universal/webgl/encoder/build/basis_encoder_threads.js @@ -0,0 +1,2 @@ +var BASIS=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";var ENVIRONMENT_IS_PTHREAD=ENVIRONMENT_IS_WORKER&&self.name?.startsWith("em-pthread");var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};if(ENVIRONMENT_IS_WORKER){_scriptName=self.location.href}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var wasmModule;var ABORT=false;var EXITSTATUS;function growMemViews(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}}var readyPromiseResolve,readyPromiseReject;var startWorker;if(ENVIRONMENT_IS_PTHREAD){var initializedJS=false;self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{var msgData=e["data"];var cmd=msgData.cmd;if(cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);startWorker=()=>{postMessage({cmd:"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};for(const handler of msgData.handlers){if(!Module[handler]||Module[handler].proxy){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler,args})};if(handler=="print")out=Module[handler];if(handler=="printErr")err=Module[handler]}}wasmMemory=msgData.wasmMemory;updateMemoryViews();wasmModule=msgData.wasmModule;createWasm();run()}else if(cmd==="run"){establishStackSpace(msgData.pthread_ptr);__emscripten_thread_init(msgData.pthread_ptr,0,0,1,0,0);PThread.threadInitTLS();__emscripten_thread_mailbox_await(msgData.pthread_ptr);if(!initializedJS){__embind_initialize_bindings();initializedJS=true}try{invokeEntryPoint(msgData.start_routine,msgData.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(msgData.target==="setimmediate"){}else if(cmd==="checkMailbox"){if(initializedJS){checkMailbox()}}else if(cmd){err(`worker: received unknown command ${cmd}`);err(msgData)}}catch(ex){__emscripten_thread_crashed();throw ex}}self.onmessage=handleMessage}var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function initMemory(){if(ENVIRONMENT_IS_PTHREAD){return}if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||536870912;wasmMemory=new WebAssembly.Memory({initial:INITIAL_MEMORY/65536,maximum:32768,shared:true})}updateMemoryViews()}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;if(ENVIRONMENT_IS_PTHREAD)return startWorker();if(!Module["noFSInit"]&&!FS.initialized)FS.init();TTY.init();wasmExports["la"]();FS.ignorePermissions=false}function postRun(){if(ENVIRONMENT_IS_PTHREAD){return}if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("basis_encoder_threads.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){assignWasmImports();var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;registerTLSInit(wasmExports["sa"]);assignWasmExports(wasmExports);wasmModule=module;return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"],result["module"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}if(ENVIRONMENT_IS_PTHREAD){var instance=new WebAssembly.Instance(wasmModule,getWasmImports());return receiveInstance(instance,wasmModule)}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var terminateWorker=worker=>{worker.terminate();worker.onmessage=e=>{}};var cleanupThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];PThread.returnWorkerToPool(worker)};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var runDependencies=0;var dependenciesFulfilled=null;var removeRunDependency=id=>{runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}};var addRunDependency=id=>{runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)};var spawnThread=threadParams=>{var worker=PThread.getNewWorker();if(!worker){return 6}PThread.runningWorkers.push(worker);PThread.pthreads[threadParams.pthread_ptr]=worker;worker.pthread_ptr=threadParams.pthread_ptr;var msg={cmd:"run",start_routine:threadParams.startRoutine,arg:threadParams.arg,pthread_ptr:threadParams.pthread_ptr};worker.postMessage(msg,threadParams.transferList);return 0};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var stackSave=()=>_emscripten_stack_get_current();var stackRestore=val=>__emscripten_stack_restore(val);var stackAlloc=sz=>__emscripten_stack_alloc(sz);var proxyToMainThread=(funcIndex,emAsmAddr,sync,...callArgs)=>{var serializedNumCallArgs=callArgs.length*2;var sp=stackSave();var args=stackAlloc(serializedNumCallArgs*8);var b=args>>3;for(var i=0;i{EXITSTATUS=status;if(ENVIRONMENT_IS_PTHREAD){exitOnMainThread(status);throw"unwind"}_proc_exit(status)};var _exit=exitJS;var PThread={unusedWorkers:[],runningWorkers:[],tlsInitFunctions:[],pthreads:{},init(){if(!ENVIRONMENT_IS_PTHREAD){PThread.initMainThread()}},initMainThread(){var pthreadPoolSize=18;while(pthreadPoolSize--){PThread.allocateUnusedWorker()}addOnPreRun(async()=>{var pthreadPoolReady=PThread.loadWasmModuleToAllWorkers();addRunDependency("loading-workers");await pthreadPoolReady;removeRunDependency("loading-workers")})},terminateAllThreads:()=>{for(var worker of PThread.runningWorkers){terminateWorker(worker)}for(var worker of PThread.unusedWorkers){terminateWorker(worker)}PThread.unusedWorkers=[];PThread.runningWorkers=[];PThread.pthreads={}},returnWorkerToPool:worker=>{var pthread_ptr=worker.pthread_ptr;delete PThread.pthreads[pthread_ptr];PThread.unusedWorkers.push(worker);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0;__emscripten_thread_free_data(pthread_ptr)},threadInitTLS(){PThread.tlsInitFunctions.forEach(f=>f())},loadWasmModuleToWorker:worker=>new Promise(onFinishedLoading=>{worker.onmessage=e=>{var d=e["data"];var cmd=d.cmd;if(d.targetThread&&d.targetThread!=_pthread_self()){var targetWorker=PThread.pthreads[d.targetThread];if(targetWorker){targetWorker.postMessage(d,d.transferList)}else{err(`Internal error! Worker sent a message "${cmd}" to target pthread ${d.targetThread}, but that thread no longer exists!`)}return}if(cmd==="checkMailbox"){checkMailbox()}else if(cmd==="spawnThread"){spawnThread(d)}else if(cmd==="cleanupThread"){callUserCallback(()=>cleanupThread(d.thread))}else if(cmd==="loaded"){worker.loaded=true;onFinishedLoading(worker)}else if(d.target==="setimmediate"){worker.postMessage(d)}else if(cmd==="callHandler"){Module[d.handler](...d.args)}else if(cmd){err(`worker sent an unknown command ${cmd}`)}};worker.onerror=e=>{var message="worker sent an error!";err(`${message} ${e.filename}:${e.lineno}: ${e.message}`);throw e};var handlers=[];var knownHandlers=["onExit","onAbort","print","printErr"];for(var handler of knownHandlers){if(Module.propertyIsEnumerable(handler)){handlers.push(handler)}}worker.postMessage({cmd:"load",handlers,wasmMemory,wasmModule})}),async loadWasmModuleToAllWorkers(){if(ENVIRONMENT_IS_PTHREAD){return}let pthreadPoolReady=Promise.all(PThread.unusedWorkers.map(PThread.loadWasmModuleToWorker));return pthreadPoolReady},allocateUnusedWorker(){var worker;var pthreadMainJs=_scriptName;if(Module["mainScriptUrlOrBlob"]){pthreadMainJs=Module["mainScriptUrlOrBlob"];if(typeof pthreadMainJs!="string"){pthreadMainJs=URL.createObjectURL(pthreadMainJs)}}worker=new Worker(pthreadMainJs,{name:"em-pthread"});PThread.unusedWorkers.push(worker)},getNewWorker(){if(PThread.unusedWorkers.length==0){PThread.allocateUnusedWorker();PThread.loadWasmModuleToWorker(PThread.unusedWorkers[0])}return PThread.unusedWorkers.pop()}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);function establishStackSpace(pthread_ptr){var stackHigh=(growMemViews(),HEAPU32)[pthread_ptr+52>>2];var stackSize=(growMemViews(),HEAPU32)[pthread_ptr+56>>2];var stackLow=stackHigh-stackSize;_emscripten_stack_set_limits(stackHigh,stackLow);stackRestore(stackHigh)}var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var invokeEntryPoint=(ptr,arg)=>{runtimeKeepaliveCounter=0;noExitRuntime=0;var result=getWasmTableEntry(ptr)(arg);function finish(result){if(keepRuntimeAlive()){EXITSTATUS=result;return}__emscripten_thread_exit(result)}finish(result)};var noExitRuntime=true;var registerTLSInit=tlsInitFunc=>PThread.tlsInitFunctions.push(tlsInitFunc);var wasmMemory;class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){(growMemViews(),HEAPU32)[this.ptr+4>>2]=type}get_type(){return(growMemViews(),HEAPU32)[this.ptr+4>>2]}set_destructor(destructor){(growMemViews(),HEAPU32)[this.ptr+8>>2]=destructor}get_destructor(){return(growMemViews(),HEAPU32)[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;(growMemViews(),HEAP8)[this.ptr+12]=caught}get_caught(){return(growMemViews(),HEAP8)[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;(growMemViews(),HEAP8)[this.ptr+13]=rethrown}get_rethrown(){return(growMemViews(),HEAP8)[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){(growMemViews(),HEAPU32)[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return(growMemViews(),HEAPU32)[this.ptr+16>>2]}}var exceptionLast=0;var uncaughtExceptionCount=0;var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};function pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(2,0,1,pthread_ptr,attr,startRoutine,arg);return ___pthread_create_js(pthread_ptr,attr,startRoutine,arg)}var _emscripten_has_threading_support=()=>!!globalThis.SharedArrayBuffer;var ___pthread_create_js=(pthread_ptr,attr,startRoutine,arg)=>{if(!_emscripten_has_threading_support()){return 6}var transferList=[];var error=0;if(ENVIRONMENT_IS_PTHREAD&&(transferList.length===0||error)){return pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg)}if(error)return error;var threadParams={startRoutine,pthread_ptr,arg,transferList};if(ENVIRONMENT_IS_PTHREAD){threadParams.cmd="spawnThread";postMessage(threadParams,transferList);return 0}return spawnThread(threadParams)};var syscallGetVarargI=()=>{var ret=(growMemViews(),HEAP32)[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret};var syscallGetVarargP=syscallGetVarargI;var PATH={isAbs:path=>path.charAt(0)==="/",splitPath:filename=>{var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;return splitPathRe.exec(filename).slice(1)},normalizeArray:(parts,allowAboveRoot)=>{var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up;up--){parts.unshift("..")}}return parts},normalize:path=>{var isAbsolute=PATH.isAbs(path),trailingSlash=path.slice(-1)==="/";path=PATH.normalizeArray(path.split("/").filter(p=>!!p),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path},dirname:path=>{var result=PATH.splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.slice(0,-1)}return root+dir},basename:path=>path&&path.match(/([^\/]+|\/)\/*$/)[1],join:(...paths)=>PATH.normalize(paths.join("/")),join2:(l,r)=>PATH.normalize(l+"/"+r)};var initRandomFill=()=>view=>view.set(crypto.getRandomValues(new Uint8Array(view.byteLength)));var randomFill=view=>{(randomFill=initRandomFill())(view)};var PATH_FS={resolve:(...args)=>{var resolvedPath="",resolvedAbsolute=false;for(var i=args.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?args[i]:FS.cwd();if(typeof path!="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){return""}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=PATH.isAbs(path)}resolvedPath=PATH.normalizeArray(resolvedPath.split("/").filter(p=>!!p),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."},relative:(from,to)=>{from=PATH_FS.resolve(from).slice(1);to=PATH_FS.resolve(to).slice(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof ArrayBuffer?heapOrArray.subarray(idx,endPtr):heapOrArray.slice(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var FS_stdin_getChar_buffer=[];var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var intArrayFromString=(stringy,dontAddNull,length)=>{var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array};var FS_stdin_getChar=()=>{if(!FS_stdin_getChar_buffer.length){var result=null;if(globalThis.window?.prompt){result=window.prompt("Input: ");if(result!==null){result+="\n"}}else{}if(!result){return null}FS_stdin_getChar_buffer=intArrayFromString(result,true)}return FS_stdin_getChar_buffer.shift()};var TTY={ttys:[],init(){},shutdown(){},register(dev,ops){TTY.ttys[dev]={input:[],output:[],ops};FS.registerDevice(dev,TTY.stream_ops)},stream_ops:{open(stream){var tty=TTY.ttys[stream.node.rdev];if(!tty){throw new FS.ErrnoError(43)}stream.tty=tty;stream.seekable=false},close(stream){stream.tty.ops.fsync(stream.tty)},fsync(stream){stream.tty.ops.fsync(stream.tty)},read(stream,buffer,offset,length,pos){if(!stream.tty||!stream.tty.ops.get_char){throw new FS.ErrnoError(60)}var bytesRead=0;for(var i=0;i0){out(UTF8ArrayToString(tty.output));tty.output=[]}},ioctl_tcgets(tty){return{c_iflag:25856,c_oflag:5,c_cflag:191,c_lflag:35387,c_cc:[3,28,127,21,4,0,1,0,17,19,26,0,18,15,23,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}},ioctl_tcsets(tty,optional_actions,data){return 0},ioctl_tiocgwinsz(tty){return[24,80]}},default_tty1_ops:{put_char(tty,val){if(val===null||val===10){err(UTF8ArrayToString(tty.output));tty.output=[]}else{if(val!=0)tty.output.push(val)}},fsync(tty){if(tty.output?.length>0){err(UTF8ArrayToString(tty.output));tty.output=[]}}}};var zeroMemory=(ptr,size)=>(growMemViews(),HEAPU8).fill(0,ptr,ptr+size);var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var mmapAlloc=size=>{size=alignMemory(size,65536);var ptr=_emscripten_builtin_memalign(65536,size);if(ptr)zeroMemory(ptr,size);return ptr};var MEMFS={ops_table:null,mount(mount){return MEMFS.createNode(null,"/",16895,0)},createNode(parent,name,mode,dev){if(FS.isBlkdev(mode)||FS.isFIFO(mode)){throw new FS.ErrnoError(63)}MEMFS.ops_table||={dir:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,lookup:MEMFS.node_ops.lookup,mknod:MEMFS.node_ops.mknod,rename:MEMFS.node_ops.rename,unlink:MEMFS.node_ops.unlink,rmdir:MEMFS.node_ops.rmdir,readdir:MEMFS.node_ops.readdir,symlink:MEMFS.node_ops.symlink},stream:{llseek:MEMFS.stream_ops.llseek}},file:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:{llseek:MEMFS.stream_ops.llseek,read:MEMFS.stream_ops.read,write:MEMFS.stream_ops.write,mmap:MEMFS.stream_ops.mmap,msync:MEMFS.stream_ops.msync}},link:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,readlink:MEMFS.node_ops.readlink},stream:{}},chrdev:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:FS.chrdev_stream_ops}};var node=FS.createNode(parent,name,mode,dev);if(FS.isDir(node.mode)){node.node_ops=MEMFS.ops_table.dir.node;node.stream_ops=MEMFS.ops_table.dir.stream;node.contents={}}else if(FS.isFile(node.mode)){node.node_ops=MEMFS.ops_table.file.node;node.stream_ops=MEMFS.ops_table.file.stream;node.usedBytes=0;node.contents=null}else if(FS.isLink(node.mode)){node.node_ops=MEMFS.ops_table.link.node;node.stream_ops=MEMFS.ops_table.link.stream}else if(FS.isChrdev(node.mode)){node.node_ops=MEMFS.ops_table.chrdev.node;node.stream_ops=MEMFS.ops_table.chrdev.stream}node.atime=node.mtime=node.ctime=Date.now();if(parent){parent.contents[name]=node;parent.atime=parent.mtime=parent.ctime=node.atime}return node},getFileDataAsTypedArray(node){if(!node.contents)return new Uint8Array(0);if(node.contents.subarray)return node.contents.subarray(0,node.usedBytes);return new Uint8Array(node.contents)},expandFileStorage(node,newCapacity){var prevCapacity=node.contents?node.contents.length:0;if(prevCapacity>=newCapacity)return;var CAPACITY_DOUBLING_MAX=1024*1024;newCapacity=Math.max(newCapacity,prevCapacity*(prevCapacity>>0);if(prevCapacity!=0)newCapacity=Math.max(newCapacity,256);var oldContents=node.contents;node.contents=new Uint8Array(newCapacity);if(node.usedBytes>0)node.contents.set(oldContents.subarray(0,node.usedBytes),0)},resizeFileStorage(node,newSize){if(node.usedBytes==newSize)return;if(newSize==0){node.contents=null;node.usedBytes=0}else{var oldContents=node.contents;node.contents=new Uint8Array(newSize);if(oldContents){node.contents.set(oldContents.subarray(0,Math.min(newSize,node.usedBytes)))}node.usedBytes=newSize}},node_ops:{getattr(node){var attr={};attr.dev=FS.isChrdev(node.mode)?node.id:1;attr.ino=node.id;attr.mode=node.mode;attr.nlink=1;attr.uid=0;attr.gid=0;attr.rdev=node.rdev;if(FS.isDir(node.mode)){attr.size=4096}else if(FS.isFile(node.mode)){attr.size=node.usedBytes}else if(FS.isLink(node.mode)){attr.size=node.link.length}else{attr.size=0}attr.atime=new Date(node.atime);attr.mtime=new Date(node.mtime);attr.ctime=new Date(node.ctime);attr.blksize=4096;attr.blocks=Math.ceil(attr.size/attr.blksize);return attr},setattr(node,attr){for(const key of["mode","atime","mtime","ctime"]){if(attr[key]!=null){node[key]=attr[key]}}if(attr.size!==undefined){MEMFS.resizeFileStorage(node,attr.size)}},lookup(parent,name){if(!MEMFS.doesNotExistError){MEMFS.doesNotExistError=new FS.ErrnoError(44);MEMFS.doesNotExistError.stack=""}throw MEMFS.doesNotExistError},mknod(parent,name,mode,dev){return MEMFS.createNode(parent,name,mode,dev)},rename(old_node,new_dir,new_name){var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(new_node){if(FS.isDir(old_node.mode)){for(var i in new_node.contents){throw new FS.ErrnoError(55)}}FS.hashRemoveNode(new_node)}delete old_node.parent.contents[old_node.name];new_dir.contents[new_name]=old_node;old_node.name=new_name;new_dir.ctime=new_dir.mtime=old_node.parent.ctime=old_node.parent.mtime=Date.now()},unlink(parent,name){delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},rmdir(parent,name){var node=FS.lookupNode(parent,name);for(var i in node.contents){throw new FS.ErrnoError(55)}delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},readdir(node){return[".","..",...Object.keys(node.contents)]},symlink(parent,newname,oldpath){var node=MEMFS.createNode(parent,newname,511|40960,0);node.link=oldpath;return node},readlink(node){if(!FS.isLink(node.mode)){throw new FS.ErrnoError(28)}return node.link}},stream_ops:{read(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=stream.node.usedBytes)return 0;var size=Math.min(stream.node.usedBytes-position,length);if(size>8&&contents.subarray){buffer.set(contents.subarray(position,position+size),offset)}else{for(var i=0;i0||position+length{var flagModes={r:0,"r+":2,w:512|64|1,"w+":512|64|2,a:1024|64|1,"a+":1024|64|2};var flags=flagModes[str];if(typeof flags=="undefined"){throw new Error(`Unknown file open mode: ${str}`)}return flags};var FS_getMode=(canRead,canWrite)=>{var mode=0;if(canRead)mode|=292|73;if(canWrite)mode|=146;return mode};var asyncLoad=async url=>{var arrayBuffer=await readAsync(url);return new Uint8Array(arrayBuffer)};var FS_createDataFile=(...args)=>FS.createDataFile(...args);var getUniqueRunDependency=id=>id;var preloadPlugins=[];var FS_handledByPreloadPlugin=async(byteArray,fullname)=>{if(typeof Browser!="undefined")Browser.init();for(var plugin of preloadPlugins){if(plugin["canHandle"](fullname)){return plugin["handle"](byteArray,fullname)}}return byteArray};var FS_preloadFile=async(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish)=>{var fullname=name?PATH_FS.resolve(PATH.join2(parent,name)):parent;var dep=getUniqueRunDependency(`cp ${fullname}`);addRunDependency(dep);try{var byteArray=url;if(typeof url=="string"){byteArray=await asyncLoad(url)}byteArray=await FS_handledByPreloadPlugin(byteArray,fullname);preFinish?.();if(!dontCreateFile){FS_createDataFile(parent,name,byteArray,canRead,canWrite,canOwn)}}finally{removeRunDependency(dep)}};var FS_createPreloadedFile=(parent,name,url,canRead,canWrite,onload,onerror,dontCreateFile,canOwn,preFinish)=>{FS_preloadFile(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish).then(onload).catch(onerror)};var FS={root:null,mounts:[],devices:{},streams:[],nextInode:1,nameTable:null,currentPath:"/",initialized:false,ignorePermissions:true,filesystems:null,syncFSRequests:0,readFiles:{},ErrnoError:class{name="ErrnoError";constructor(errno){this.errno=errno}},FSStream:class{shared={};get object(){return this.node}set object(val){this.node=val}get isRead(){return(this.flags&2097155)!==1}get isWrite(){return(this.flags&2097155)!==0}get isAppend(){return this.flags&1024}get flags(){return this.shared.flags}set flags(val){this.shared.flags=val}get position(){return this.shared.position}set position(val){this.shared.position=val}},FSNode:class{node_ops={};stream_ops={};readMode=292|73;writeMode=146;mounted=null;constructor(parent,name,mode,rdev){if(!parent){parent=this}this.parent=parent;this.mount=parent.mount;this.id=FS.nextInode++;this.name=name;this.mode=mode;this.rdev=rdev;this.atime=this.mtime=this.ctime=Date.now()}get read(){return(this.mode&this.readMode)===this.readMode}set read(val){val?this.mode|=this.readMode:this.mode&=~this.readMode}get write(){return(this.mode&this.writeMode)===this.writeMode}set write(val){val?this.mode|=this.writeMode:this.mode&=~this.writeMode}get isFolder(){return FS.isDir(this.mode)}get isDevice(){return FS.isChrdev(this.mode)}},lookupPath(path,opts={}){if(!path){throw new FS.ErrnoError(44)}opts.follow_mount??=true;if(!PATH.isAbs(path)){path=FS.cwd()+"/"+path}linkloop:for(var nlinks=0;nlinks<40;nlinks++){var parts=path.split("/").filter(p=>!!p);var current=FS.root;var current_path="/";for(var i=0;i>>0)%FS.nameTable.length},hashAddNode(node){var hash=FS.hashName(node.parent.id,node.name);node.name_next=FS.nameTable[hash];FS.nameTable[hash]=node},hashRemoveNode(node){var hash=FS.hashName(node.parent.id,node.name);if(FS.nameTable[hash]===node){FS.nameTable[hash]=node.name_next}else{var current=FS.nameTable[hash];while(current){if(current.name_next===node){current.name_next=node.name_next;break}current=current.name_next}}},lookupNode(parent,name){var errCode=FS.mayLookup(parent);if(errCode){throw new FS.ErrnoError(errCode)}var hash=FS.hashName(parent.id,name);for(var node=FS.nameTable[hash];node;node=node.name_next){var nodeName=node.name;if(node.parent.id===parent.id&&nodeName===name){return node}}return FS.lookup(parent,name)},createNode(parent,name,mode,rdev){var node=new FS.FSNode(parent,name,mode,rdev);FS.hashAddNode(node);return node},destroyNode(node){FS.hashRemoveNode(node)},isRoot(node){return node===node.parent},isMountpoint(node){return!!node.mounted},isFile(mode){return(mode&61440)===32768},isDir(mode){return(mode&61440)===16384},isLink(mode){return(mode&61440)===40960},isChrdev(mode){return(mode&61440)===8192},isBlkdev(mode){return(mode&61440)===24576},isFIFO(mode){return(mode&61440)===4096},isSocket(mode){return(mode&49152)===49152},flagsToPermissionString(flag){var perms=["r","w","rw"][flag&3];if(flag&512){perms+="w"}return perms},nodePermissions(node,perms){if(FS.ignorePermissions){return 0}if(perms.includes("r")&&!(node.mode&292)){return 2}else if(perms.includes("w")&&!(node.mode&146)){return 2}else if(perms.includes("x")&&!(node.mode&73)){return 2}return 0},mayLookup(dir){if(!FS.isDir(dir.mode))return 54;var errCode=FS.nodePermissions(dir,"x");if(errCode)return errCode;if(!dir.node_ops.lookup)return 2;return 0},mayCreate(dir,name){if(!FS.isDir(dir.mode)){return 54}try{var node=FS.lookupNode(dir,name);return 20}catch(e){}return FS.nodePermissions(dir,"wx")},mayDelete(dir,name,isdir){var node;try{node=FS.lookupNode(dir,name)}catch(e){return e.errno}var errCode=FS.nodePermissions(dir,"wx");if(errCode){return errCode}if(isdir){if(!FS.isDir(node.mode)){return 54}if(FS.isRoot(node)||FS.getPath(node)===FS.cwd()){return 10}}else{if(FS.isDir(node.mode)){return 31}}return 0},mayOpen(node,flags){if(!node){return 44}if(FS.isLink(node.mode)){return 32}else if(FS.isDir(node.mode)){if(FS.flagsToPermissionString(flags)!=="r"||flags&(512|64)){return 31}}return FS.nodePermissions(node,FS.flagsToPermissionString(flags))},checkOpExists(op,err){if(!op){throw new FS.ErrnoError(err)}return op},MAX_OPEN_FDS:4096,nextfd(){for(var fd=0;fd<=FS.MAX_OPEN_FDS;fd++){if(!FS.streams[fd]){return fd}}throw new FS.ErrnoError(33)},getStreamChecked(fd){var stream=FS.getStream(fd);if(!stream){throw new FS.ErrnoError(8)}return stream},getStream:fd=>FS.streams[fd],createStream(stream,fd=-1){stream=Object.assign(new FS.FSStream,stream);if(fd==-1){fd=FS.nextfd()}stream.fd=fd;FS.streams[fd]=stream;return stream},closeStream(fd){FS.streams[fd]=null},dupStream(origStream,fd=-1){var stream=FS.createStream(origStream,fd);stream.stream_ops?.dup?.(stream);return stream},doSetAttr(stream,node,attr){var setattr=stream?.stream_ops.setattr;var arg=setattr?stream:node;setattr??=node.node_ops.setattr;FS.checkOpExists(setattr,63);setattr(arg,attr)},chrdev_stream_ops:{open(stream){var device=FS.getDevice(stream.node.rdev);stream.stream_ops=device.stream_ops;stream.stream_ops.open?.(stream)},llseek(){throw new FS.ErrnoError(70)}},major:dev=>dev>>8,minor:dev=>dev&255,makedev:(ma,mi)=>ma<<8|mi,registerDevice(dev,ops){FS.devices[dev]={stream_ops:ops}},getDevice:dev=>FS.devices[dev],getMounts(mount){var mounts=[];var check=[mount];while(check.length){var m=check.pop();mounts.push(m);check.push(...m.mounts)}return mounts},syncfs(populate,callback){if(typeof populate=="function"){callback=populate;populate=false}FS.syncFSRequests++;if(FS.syncFSRequests>1){err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`)}var mounts=FS.getMounts(FS.root.mount);var completed=0;function doCallback(errCode){FS.syncFSRequests--;return callback(errCode)}function done(errCode){if(errCode){if(!done.errored){done.errored=true;return doCallback(errCode)}return}if(++completed>=mounts.length){doCallback(null)}}for(var mount of mounts){if(mount.type.syncfs){mount.type.syncfs(mount,populate,done)}else{done(null)}}},mount(type,opts,mountpoint){var root=mountpoint==="/";var pseudo=!mountpoint;var node;if(root&&FS.root){throw new FS.ErrnoError(10)}else if(!root&&!pseudo){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});mountpoint=lookup.path;node=lookup.node;if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}if(!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}}var mount={type,opts,mountpoint,mounts:[]};var mountRoot=type.mount(mount);mountRoot.mount=mount;mount.root=mountRoot;if(root){FS.root=mountRoot}else if(node){node.mounted=mount;if(node.mount){node.mount.mounts.push(mount)}}return mountRoot},unmount(mountpoint){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});if(!FS.isMountpoint(lookup.node)){throw new FS.ErrnoError(28)}var node=lookup.node;var mount=node.mounted;var mounts=FS.getMounts(mount);for(var[hash,current]of Object.entries(FS.nameTable)){while(current){var next=current.name_next;if(mounts.includes(current.mount)){FS.destroyNode(current)}current=next}}node.mounted=null;var idx=node.mount.mounts.indexOf(mount);node.mount.mounts.splice(idx,1)},lookup(parent,name){return parent.node_ops.lookup(parent,name)},mknod(path,mode,dev){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);if(!name){throw new FS.ErrnoError(28)}if(name==="."||name===".."){throw new FS.ErrnoError(20)}var errCode=FS.mayCreate(parent,name);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.mknod){throw new FS.ErrnoError(63)}return parent.node_ops.mknod(parent,name,mode,dev)},statfs(path){return FS.statfsNode(FS.lookupPath(path,{follow:true}).node)},statfsStream(stream){return FS.statfsNode(stream.node)},statfsNode(node){var rtn={bsize:4096,frsize:4096,blocks:1e6,bfree:5e5,bavail:5e5,files:FS.nextInode,ffree:FS.nextInode-1,fsid:42,flags:2,namelen:255};if(node.node_ops.statfs){Object.assign(rtn,node.node_ops.statfs(node.mount.opts.root))}return rtn},create(path,mode=438){mode&=4095;mode|=32768;return FS.mknod(path,mode,0)},mkdir(path,mode=511){mode&=511|512;mode|=16384;return FS.mknod(path,mode,0)},mkdirTree(path,mode){var dirs=path.split("/");var d="";for(var dir of dirs){if(!dir)continue;if(d||PATH.isAbs(path))d+="/";d+=dir;try{FS.mkdir(d,mode)}catch(e){if(e.errno!=20)throw e}}},mkdev(path,mode,dev){if(typeof dev=="undefined"){dev=mode;mode=438}mode|=8192;return FS.mknod(path,mode,dev)},symlink(oldpath,newpath){if(!PATH_FS.resolve(oldpath)){throw new FS.ErrnoError(44)}var lookup=FS.lookupPath(newpath,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var newname=PATH.basename(newpath);var errCode=FS.mayCreate(parent,newname);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.symlink){throw new FS.ErrnoError(63)}return parent.node_ops.symlink(parent,newname,oldpath)},rename(old_path,new_path){var old_dirname=PATH.dirname(old_path);var new_dirname=PATH.dirname(new_path);var old_name=PATH.basename(old_path);var new_name=PATH.basename(new_path);var lookup,old_dir,new_dir;lookup=FS.lookupPath(old_path,{parent:true});old_dir=lookup.node;lookup=FS.lookupPath(new_path,{parent:true});new_dir=lookup.node;if(!old_dir||!new_dir)throw new FS.ErrnoError(44);if(old_dir.mount!==new_dir.mount){throw new FS.ErrnoError(75)}var old_node=FS.lookupNode(old_dir,old_name);var relative=PATH_FS.relative(old_path,new_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(28)}relative=PATH_FS.relative(new_path,old_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(55)}var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(old_node===new_node){return}var isdir=FS.isDir(old_node.mode);var errCode=FS.mayDelete(old_dir,old_name,isdir);if(errCode){throw new FS.ErrnoError(errCode)}errCode=new_node?FS.mayDelete(new_dir,new_name,isdir):FS.mayCreate(new_dir,new_name);if(errCode){throw new FS.ErrnoError(errCode)}if(!old_dir.node_ops.rename){throw new FS.ErrnoError(63)}if(FS.isMountpoint(old_node)||new_node&&FS.isMountpoint(new_node)){throw new FS.ErrnoError(10)}if(new_dir!==old_dir){errCode=FS.nodePermissions(old_dir,"w");if(errCode){throw new FS.ErrnoError(errCode)}}FS.hashRemoveNode(old_node);try{old_dir.node_ops.rename(old_node,new_dir,new_name);old_node.parent=new_dir}catch(e){throw e}finally{FS.hashAddNode(old_node)}},rmdir(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,true);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.rmdir){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.rmdir(parent,name);FS.destroyNode(node)},readdir(path){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var readdir=FS.checkOpExists(node.node_ops.readdir,54);return readdir(node)},unlink(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,false);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.unlink){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.unlink(parent,name);FS.destroyNode(node)},readlink(path){var lookup=FS.lookupPath(path);var link=lookup.node;if(!link){throw new FS.ErrnoError(44)}if(!link.node_ops.readlink){throw new FS.ErrnoError(28)}return link.node_ops.readlink(link)},stat(path,dontFollow){var lookup=FS.lookupPath(path,{follow:!dontFollow});var node=lookup.node;var getattr=FS.checkOpExists(node.node_ops.getattr,63);return getattr(node)},fstat(fd){var stream=FS.getStreamChecked(fd);var node=stream.node;var getattr=stream.stream_ops.getattr;var arg=getattr?stream:node;getattr??=node.node_ops.getattr;FS.checkOpExists(getattr,63);return getattr(arg)},lstat(path){return FS.stat(path,true)},doChmod(stream,node,mode,dontFollow){FS.doSetAttr(stream,node,{mode:mode&4095|node.mode&~4095,ctime:Date.now(),dontFollow})},chmod(path,mode,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChmod(null,node,mode,dontFollow)},lchmod(path,mode){FS.chmod(path,mode,true)},fchmod(fd,mode){var stream=FS.getStreamChecked(fd);FS.doChmod(stream,stream.node,mode,false)},doChown(stream,node,dontFollow){FS.doSetAttr(stream,node,{timestamp:Date.now(),dontFollow})},chown(path,uid,gid,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChown(null,node,dontFollow)},lchown(path,uid,gid){FS.chown(path,uid,gid,true)},fchown(fd,uid,gid){var stream=FS.getStreamChecked(fd);FS.doChown(stream,stream.node,false)},doTruncate(stream,node,len){if(FS.isDir(node.mode)){throw new FS.ErrnoError(31)}if(!FS.isFile(node.mode)){throw new FS.ErrnoError(28)}var errCode=FS.nodePermissions(node,"w");if(errCode){throw new FS.ErrnoError(errCode)}FS.doSetAttr(stream,node,{size:len,timestamp:Date.now()})},truncate(path,len){if(len<0){throw new FS.ErrnoError(28)}var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:true});node=lookup.node}else{node=path}FS.doTruncate(null,node,len)},ftruncate(fd,len){var stream=FS.getStreamChecked(fd);if(len<0||(stream.flags&2097155)===0){throw new FS.ErrnoError(28)}FS.doTruncate(stream,stream.node,len)},utime(path,atime,mtime){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var setattr=FS.checkOpExists(node.node_ops.setattr,63);setattr(node,{atime,mtime})},open(path,flags,mode=438){if(path===""){throw new FS.ErrnoError(44)}flags=typeof flags=="string"?FS_modeStringToFlags(flags):flags;if(flags&64){mode=mode&4095|32768}else{mode=0}var node;var isDirPath;if(typeof path=="object"){node=path}else{isDirPath=path.endsWith("/");var lookup=FS.lookupPath(path,{follow:!(flags&131072),noent_okay:true});node=lookup.node;path=lookup.path}var created=false;if(flags&64){if(node){if(flags&128){throw new FS.ErrnoError(20)}}else if(isDirPath){throw new FS.ErrnoError(31)}else{node=FS.mknod(path,mode|511,0);created=true}}if(!node){throw new FS.ErrnoError(44)}if(FS.isChrdev(node.mode)){flags&=~512}if(flags&65536&&!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}if(!created){var errCode=FS.mayOpen(node,flags);if(errCode){throw new FS.ErrnoError(errCode)}}if(flags&512&&!created){FS.truncate(node,0)}flags&=~(128|512|131072);var stream=FS.createStream({node,path:FS.getPath(node),flags,seekable:true,position:0,stream_ops:node.stream_ops,ungotten:[],error:false});if(stream.stream_ops.open){stream.stream_ops.open(stream)}if(created){FS.chmod(node,mode&511)}if(Module["logReadFiles"]&&!(flags&1)){if(!(path in FS.readFiles)){FS.readFiles[path]=1}}return stream},close(stream){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(stream.getdents)stream.getdents=null;try{if(stream.stream_ops.close){stream.stream_ops.close(stream)}}catch(e){throw e}finally{FS.closeStream(stream.fd)}stream.fd=null},isClosed(stream){return stream.fd===null},llseek(stream,offset,whence){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(!stream.seekable||!stream.stream_ops.llseek){throw new FS.ErrnoError(70)}if(whence!=0&&whence!=1&&whence!=2){throw new FS.ErrnoError(28)}stream.position=stream.stream_ops.llseek(stream,offset,whence);stream.ungotten=[];return stream.position},read(stream,buffer,offset,length,position){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.read){throw new FS.ErrnoError(28)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesRead=stream.stream_ops.read(stream,buffer,offset,length,position);if(!seeking)stream.position+=bytesRead;return bytesRead},write(stream,buffer,offset,length,position,canOwn){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===0){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.write){throw new FS.ErrnoError(28)}if(stream.seekable&&stream.flags&1024){FS.llseek(stream,0,2)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesWritten=stream.stream_ops.write(stream,buffer,offset,length,position,canOwn);if(!seeking)stream.position+=bytesWritten;return bytesWritten},mmap(stream,length,position,prot,flags){if((prot&2)!==0&&(flags&2)===0&&(stream.flags&2097155)!==2){throw new FS.ErrnoError(2)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(2)}if(!stream.stream_ops.mmap){throw new FS.ErrnoError(43)}if(!length){throw new FS.ErrnoError(28)}return stream.stream_ops.mmap(stream,length,position,prot,flags)},msync(stream,buffer,offset,length,mmapFlags){if(!stream.stream_ops.msync){return 0}return stream.stream_ops.msync(stream,buffer,offset,length,mmapFlags)},ioctl(stream,cmd,arg){if(!stream.stream_ops.ioctl){throw new FS.ErrnoError(59)}return stream.stream_ops.ioctl(stream,cmd,arg)},readFile(path,opts={}){opts.flags=opts.flags||0;opts.encoding=opts.encoding||"binary";if(opts.encoding!=="utf8"&&opts.encoding!=="binary"){abort(`Invalid encoding type "${opts.encoding}"`)}var stream=FS.open(path,opts.flags);var stat=FS.stat(path);var length=stat.size;var buf=new Uint8Array(length);FS.read(stream,buf,0,length,0);if(opts.encoding==="utf8"){buf=UTF8ArrayToString(buf)}FS.close(stream);return buf},writeFile(path,data,opts={}){opts.flags=opts.flags||577;var stream=FS.open(path,opts.flags,opts.mode);if(typeof data=="string"){data=new Uint8Array(intArrayFromString(data,true))}if(ArrayBuffer.isView(data)){FS.write(stream,data,0,data.byteLength,undefined,opts.canOwn)}else{abort("Unsupported data type")}FS.close(stream)},cwd:()=>FS.currentPath,chdir(path){var lookup=FS.lookupPath(path,{follow:true});if(lookup.node===null){throw new FS.ErrnoError(44)}if(!FS.isDir(lookup.node.mode)){throw new FS.ErrnoError(54)}var errCode=FS.nodePermissions(lookup.node,"x");if(errCode){throw new FS.ErrnoError(errCode)}FS.currentPath=lookup.path},createDefaultDirectories(){FS.mkdir("/tmp");FS.mkdir("/home");FS.mkdir("/home/web_user")},createDefaultDevices(){FS.mkdir("/dev");FS.registerDevice(FS.makedev(1,3),{read:()=>0,write:(stream,buffer,offset,length,pos)=>length,llseek:()=>0});FS.mkdev("/dev/null",FS.makedev(1,3));TTY.register(FS.makedev(5,0),TTY.default_tty_ops);TTY.register(FS.makedev(6,0),TTY.default_tty1_ops);FS.mkdev("/dev/tty",FS.makedev(5,0));FS.mkdev("/dev/tty1",FS.makedev(6,0));var randomBuffer=new Uint8Array(1024),randomLeft=0;var randomByte=()=>{if(randomLeft===0){randomFill(randomBuffer);randomLeft=randomBuffer.byteLength}return randomBuffer[--randomLeft]};FS.createDevice("/dev","random",randomByte);FS.createDevice("/dev","urandom",randomByte);FS.mkdir("/dev/shm");FS.mkdir("/dev/shm/tmp")},createSpecialDirectories(){FS.mkdir("/proc");var proc_self=FS.mkdir("/proc/self");FS.mkdir("/proc/self/fd");FS.mount({mount(){var node=FS.createNode(proc_self,"fd",16895,73);node.stream_ops={llseek:MEMFS.stream_ops.llseek};node.node_ops={lookup(parent,name){var fd=+name;var stream=FS.getStreamChecked(fd);var ret={parent:null,mount:{mountpoint:"fake"},node_ops:{readlink:()=>stream.path},id:fd+1};ret.parent=ret;return ret},readdir(){return Array.from(FS.streams.entries()).filter(([k,v])=>v).map(([k,v])=>k.toString())}};return node}},{},"/proc/self/fd")},createStandardStreams(input,output,error){if(input){FS.createDevice("/dev","stdin",input)}else{FS.symlink("/dev/tty","/dev/stdin")}if(output){FS.createDevice("/dev","stdout",null,output)}else{FS.symlink("/dev/tty","/dev/stdout")}if(error){FS.createDevice("/dev","stderr",null,error)}else{FS.symlink("/dev/tty1","/dev/stderr")}var stdin=FS.open("/dev/stdin",0);var stdout=FS.open("/dev/stdout",1);var stderr=FS.open("/dev/stderr",1)},staticInit(){FS.nameTable=new Array(4096);FS.mount(MEMFS,{},"/");FS.createDefaultDirectories();FS.createDefaultDevices();FS.createSpecialDirectories();FS.filesystems={MEMFS}},init(input,output,error){FS.initialized=true;input??=Module["stdin"];output??=Module["stdout"];error??=Module["stderr"];FS.createStandardStreams(input,output,error)},quit(){FS.initialized=false;for(var stream of FS.streams){if(stream){FS.close(stream)}}},findObject(path,dontResolveLastLink){var ret=FS.analyzePath(path,dontResolveLastLink);if(!ret.exists){return null}return ret.object},analyzePath(path,dontResolveLastLink){try{var lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});path=lookup.path}catch(e){}var ret={isRoot:false,exists:false,error:0,name:null,path:null,object:null,parentExists:false,parentPath:null,parentObject:null};try{var lookup=FS.lookupPath(path,{parent:true});ret.parentExists=true;ret.parentPath=lookup.path;ret.parentObject=lookup.node;ret.name=PATH.basename(path);lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});ret.exists=true;ret.path=lookup.path;ret.object=lookup.node;ret.name=lookup.node.name;ret.isRoot=lookup.path==="/"}catch(e){ret.error=e.errno}return ret},createPath(parent,path,canRead,canWrite){parent=typeof parent=="string"?parent:FS.getPath(parent);var parts=path.split("/").reverse();while(parts.length){var part=parts.pop();if(!part)continue;var current=PATH.join2(parent,part);try{FS.mkdir(current)}catch(e){if(e.errno!=20)throw e}parent=current}return current},createFile(parent,name,properties,canRead,canWrite){var path=PATH.join2(typeof parent=="string"?parent:FS.getPath(parent),name);var mode=FS_getMode(canRead,canWrite);return FS.create(path,mode)},createDataFile(parent,name,data,canRead,canWrite,canOwn){var path=name;if(parent){parent=typeof parent=="string"?parent:FS.getPath(parent);path=name?PATH.join2(parent,name):parent}var mode=FS_getMode(canRead,canWrite);var node=FS.create(path,mode);if(data){if(typeof data=="string"){var arr=new Array(data.length);for(var i=0,len=data.length;ithis.length-1||idx<0){return undefined}var chunkOffset=idx%this.chunkSize;var chunkNum=idx/this.chunkSize|0;return this.getter(chunkNum)[chunkOffset]}setDataGetter(getter){this.getter=getter}cacheLength(){var xhr=new XMLHttpRequest;xhr.open("HEAD",url,false);xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);var datalength=Number(xhr.getResponseHeader("Content-length"));var header;var hasByteServing=(header=xhr.getResponseHeader("Accept-Ranges"))&&header==="bytes";var usesGzip=(header=xhr.getResponseHeader("Content-Encoding"))&&header==="gzip";var chunkSize=1024*1024;if(!hasByteServing)chunkSize=datalength;var doXHR=(from,to)=>{if(from>to)abort("invalid range ("+from+", "+to+") or no bytes requested!");if(to>datalength-1)abort("only "+datalength+" bytes available! programmer error!");var xhr=new XMLHttpRequest;xhr.open("GET",url,false);if(datalength!==chunkSize)xhr.setRequestHeader("Range","bytes="+from+"-"+to);xhr.responseType="arraybuffer";if(xhr.overrideMimeType){xhr.overrideMimeType("text/plain; charset=x-user-defined")}xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);if(xhr.response!==undefined){return new Uint8Array(xhr.response||[])}return intArrayFromString(xhr.responseText||"",true)};var lazyArray=this;lazyArray.setDataGetter(chunkNum=>{var start=chunkNum*chunkSize;var end=(chunkNum+1)*chunkSize-1;end=Math.min(end,datalength-1);if(typeof lazyArray.chunks[chunkNum]=="undefined"){lazyArray.chunks[chunkNum]=doXHR(start,end)}if(typeof lazyArray.chunks[chunkNum]=="undefined")abort("doXHR failed!");return lazyArray.chunks[chunkNum]});if(usesGzip||!datalength){chunkSize=datalength=1;datalength=this.getter(0).length;chunkSize=datalength;out("LazyFiles on gzip forces download of the whole file when length is accessed")}this._length=datalength;this._chunkSize=chunkSize;this.lengthKnown=true}get length(){if(!this.lengthKnown){this.cacheLength()}return this._length}get chunkSize(){if(!this.lengthKnown){this.cacheLength()}return this._chunkSize}}if(globalThis.XMLHttpRequest){if(!ENVIRONMENT_IS_WORKER)abort("Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc");var lazyArray=new LazyUint8Array;var properties={isDevice:false,contents:lazyArray}}else{var properties={isDevice:false,url}}var node=FS.createFile(parent,name,properties,canRead,canWrite);if(properties.contents){node.contents=properties.contents}else if(properties.url){node.contents=null;node.url=properties.url}Object.defineProperties(node,{usedBytes:{get:function(){return this.contents.length}}});var stream_ops={};for(const[key,fn]of Object.entries(node.stream_ops)){stream_ops[key]=(...args)=>{FS.forceLoadFile(node);return fn(...args)}}function writeChunks(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=contents.length)return 0;var size=Math.min(contents.length-position,length);if(contents.slice){for(var i=0;i{FS.forceLoadFile(node);return writeChunks(stream,buffer,offset,length,position)};stream_ops.mmap=(stream,length,position,prot,flags)=>{FS.forceLoadFile(node);var ptr=mmapAlloc(length);if(!ptr){throw new FS.ErrnoError(48)}writeChunks(stream,(growMemViews(),HEAP8),ptr,length,position);return{ptr,allocated:true}};node.stream_ops=stream_ops;return node}};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString((growMemViews(),HEAPU8),ptr,maxBytesToRead,ignoreNul):"";var SYSCALLS={DEFAULT_POLLMASK:5,calculateAt(dirfd,path,allowEmpty){if(PATH.isAbs(path)){return path}var dir;if(dirfd===-100){dir=FS.cwd()}else{var dirstream=SYSCALLS.getStreamFromFD(dirfd);dir=dirstream.path}if(path.length==0){if(!allowEmpty){throw new FS.ErrnoError(44)}return dir}return dir+"/"+path},writeStat(buf,stat){(growMemViews(),HEAPU32)[buf>>2]=stat.dev;(growMemViews(),HEAPU32)[buf+4>>2]=stat.mode;(growMemViews(),HEAPU32)[buf+8>>2]=stat.nlink;(growMemViews(),HEAPU32)[buf+12>>2]=stat.uid;(growMemViews(),HEAPU32)[buf+16>>2]=stat.gid;(growMemViews(),HEAPU32)[buf+20>>2]=stat.rdev;(growMemViews(),HEAP64)[buf+24>>3]=BigInt(stat.size);(growMemViews(),HEAP32)[buf+32>>2]=4096;(growMemViews(),HEAP32)[buf+36>>2]=stat.blocks;var atime=stat.atime.getTime();var mtime=stat.mtime.getTime();var ctime=stat.ctime.getTime();(growMemViews(),HEAP64)[buf+40>>3]=BigInt(Math.floor(atime/1e3));(growMemViews(),HEAPU32)[buf+48>>2]=atime%1e3*1e3*1e3;(growMemViews(),HEAP64)[buf+56>>3]=BigInt(Math.floor(mtime/1e3));(growMemViews(),HEAPU32)[buf+64>>2]=mtime%1e3*1e3*1e3;(growMemViews(),HEAP64)[buf+72>>3]=BigInt(Math.floor(ctime/1e3));(growMemViews(),HEAPU32)[buf+80>>2]=ctime%1e3*1e3*1e3;(growMemViews(),HEAP64)[buf+88>>3]=BigInt(stat.ino);return 0},writeStatFs(buf,stats){(growMemViews(),HEAPU32)[buf+4>>2]=stats.bsize;(growMemViews(),HEAPU32)[buf+60>>2]=stats.bsize;(growMemViews(),HEAP64)[buf+8>>3]=BigInt(stats.blocks);(growMemViews(),HEAP64)[buf+16>>3]=BigInt(stats.bfree);(growMemViews(),HEAP64)[buf+24>>3]=BigInt(stats.bavail);(growMemViews(),HEAP64)[buf+32>>3]=BigInt(stats.files);(growMemViews(),HEAP64)[buf+40>>3]=BigInt(stats.ffree);(growMemViews(),HEAPU32)[buf+48>>2]=stats.fsid;(growMemViews(),HEAPU32)[buf+64>>2]=stats.flags;(growMemViews(),HEAPU32)[buf+56>>2]=stats.namelen},doMsync(addr,stream,len,flags,offset){if(!FS.isFile(stream.node.mode)){throw new FS.ErrnoError(43)}if(flags&2){return 0}var buffer=(growMemViews(),HEAPU8).slice(addr,addr+len);FS.msync(stream,buffer,offset,len,flags)},getStreamFromFD(fd){var stream=FS.getStreamChecked(fd);return stream},varargs:undefined,getStr(ptr){var ret=UTF8ToString(ptr);return ret}};function ___syscall_fcntl64(fd,cmd,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(3,0,1,fd,cmd,varargs);SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(cmd){case 0:{var arg=syscallGetVarargI();if(arg<0){return-28}while(FS.streams[arg]){arg++}var newStream;newStream=FS.dupStream(stream,arg);return newStream.fd}case 1:case 2:return 0;case 3:return stream.flags;case 4:{var arg=syscallGetVarargI();stream.flags|=arg;return 0}case 12:{var arg=syscallGetVarargP();var offset=0;(growMemViews(),HEAP16)[arg+offset>>1]=2;return 0}case 13:case 14:return 0}return-28}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_fstat64(fd,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(4,0,1,fd,buf);try{return SYSCALLS.writeStat(buf,FS.fstat(fd))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_ioctl(fd,op,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(5,0,1,fd,op,varargs);SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(op){case 21509:{if(!stream.tty)return-59;return 0}case 21505:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcgets){var termios=stream.tty.ops.ioctl_tcgets(stream);var argp=syscallGetVarargP();(growMemViews(),HEAP32)[argp>>2]=termios.c_iflag||0;(growMemViews(),HEAP32)[argp+4>>2]=termios.c_oflag||0;(growMemViews(),HEAP32)[argp+8>>2]=termios.c_cflag||0;(growMemViews(),HEAP32)[argp+12>>2]=termios.c_lflag||0;for(var i=0;i<32;i++){(growMemViews(),HEAP8)[argp+i+17]=termios.c_cc[i]||0}return 0}return 0}case 21510:case 21511:case 21512:{if(!stream.tty)return-59;return 0}case 21506:case 21507:case 21508:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcsets){var argp=syscallGetVarargP();var c_iflag=(growMemViews(),HEAP32)[argp>>2];var c_oflag=(growMemViews(),HEAP32)[argp+4>>2];var c_cflag=(growMemViews(),HEAP32)[argp+8>>2];var c_lflag=(growMemViews(),HEAP32)[argp+12>>2];var c_cc=[];for(var i=0;i<32;i++){c_cc.push((growMemViews(),HEAP8)[argp+i+17])}return stream.tty.ops.ioctl_tcsets(stream.tty,op,{c_iflag,c_oflag,c_cflag,c_lflag,c_cc})}return 0}case 21519:{if(!stream.tty)return-59;var argp=syscallGetVarargP();(growMemViews(),HEAP32)[argp>>2]=0;return 0}case 21520:{if(!stream.tty)return-59;return-28}case 21537:case 21531:{var argp=syscallGetVarargP();return FS.ioctl(stream,op,argp)}case 21523:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tiocgwinsz){var winsize=stream.tty.ops.ioctl_tiocgwinsz(stream.tty);var argp=syscallGetVarargP();(growMemViews(),HEAP16)[argp>>1]=winsize[0];(growMemViews(),HEAP16)[argp+2>>1]=winsize[1]}return 0}case 21524:{if(!stream.tty)return-59;return 0}case 21515:{if(!stream.tty)return-59;return 0}default:return-28}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_lstat64(path,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(6,0,1,path,buf);try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.lstat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_newfstatat(dirfd,path,buf,flags){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(7,0,1,dirfd,path,buf,flags);try{path=SYSCALLS.getStr(path);var nofollow=flags&256;var allowEmpty=flags&4096;flags=flags&~6400;path=SYSCALLS.calculateAt(dirfd,path,allowEmpty);return SYSCALLS.writeStat(buf,nofollow?FS.lstat(path):FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_openat(dirfd,path,flags,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(8,0,1,dirfd,path,flags,varargs);SYSCALLS.varargs=varargs;try{path=SYSCALLS.getStr(path);path=SYSCALLS.calculateAt(dirfd,path);var mode=varargs?syscallGetVarargI():0;return FS.open(path,flags,mode).fd}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_stat64(path,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(9,0,1,path,buf);try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __abort_js=()=>abort("");var structRegistrations={};var runDestructors=destructors=>{while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}};function readPointer(pointer){return this.fromWireType((growMemViews(),HEAPU32)[pointer>>2])}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var InternalError=class InternalError extends Error{constructor(message){super(message);this.name="InternalError"}};var throwInternalError=message=>{throw new InternalError(message)};var whenDependentTypesAreResolved=(myTypes,dependentTypes,getTypeConverters)=>{myTypes.forEach(type=>typeDependencies[type]=dependentTypes);function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}}if(0===unregisteredTypes.length){onComplete(typeConverters)}};var __embind_finalize_value_object=structType=>{var reg=structRegistrations[structType];delete structRegistrations[structType];var rawConstructor=reg.rawConstructor;var rawDestructor=reg.rawDestructor;var fieldRecords=reg.fields;var fieldTypes=fieldRecords.map(field=>field.getterReturnType).concat(fieldRecords.map(field=>field.setterArgumentType));whenDependentTypesAreResolved([structType],fieldTypes,fieldTypes=>{var fields={};for(var[i,field]of fieldRecords.entries()){const getterReturnType=fieldTypes[i];const getter=field.getter;const getterContext=field.getterContext;const setterArgumentType=fieldTypes[i+fieldRecords.length];const setter=field.setter;const setterContext=field.setterContext;fields[field.fieldName]={read:ptr=>getterReturnType.fromWireType(getter(getterContext,ptr)),write:(ptr,o)=>{var destructors=[];setter(setterContext,ptr,setterArgumentType.toWireType(destructors,o));runDestructors(destructors)},optional:getterReturnType.optional}}return[{name:reg.name,fromWireType:ptr=>{var rv={};for(var i in fields){rv[i]=fields[i].read(ptr)}rawDestructor(ptr);return rv},toWireType:(destructors,o)=>{for(var fieldName in fields){if(!(fieldName in o)&&!fields[fieldName].optional){throw new TypeError(`Missing field: "${fieldName}"`)}}var ptr=rawConstructor();for(fieldName in fields){fields[fieldName].write(ptr,o[fieldName])}if(destructors!==null){destructors.push(rawDestructor,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction:rawDestructor}]})};var AsciiToString=ptr=>{var str="";while(1){var ch=(growMemViews(),HEAPU8)[ptr++];if(!ch)return str;str+=String.fromCharCode(ch)}};var BindingError=class BindingError extends Error{constructor(message){super(message);this.name="BindingError"}};var throwBindingError=message=>{throw new BindingError(message)};function sharedRegisterType(rawType,registeredInstance,options={}){var name=registeredInstance.name;if(!rawType){throwBindingError(`type "${name}" must have a positive integer typeid pointer`)}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError(`Cannot register type '${name}' twice`)}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function registerType(rawType,registeredInstance,options={}){return sharedRegisterType(rawType,registeredInstance,options)}var integerReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?pointer=>(growMemViews(),HEAP8)[pointer]:pointer=>(growMemViews(),HEAPU8)[pointer];case 2:return signed?pointer=>(growMemViews(),HEAP16)[pointer>>1]:pointer=>(growMemViews(),HEAPU16)[pointer>>1];case 4:return signed?pointer=>(growMemViews(),HEAP32)[pointer>>2]:pointer=>(growMemViews(),HEAPU32)[pointer>>2];case 8:return signed?pointer=>(growMemViews(),HEAP64)[pointer>>3]:pointer=>(growMemViews(),HEAPU64)[pointer>>3];default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_bigint=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0n;let fromWireType=value=>value;if(isUnsignedType){const bitSize=size*8;fromWireType=value=>BigInt.asUintN(bitSize,value);maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>{if(typeof value=="number"){value=BigInt(value)}return value},readValueFromPointer:integerReadValueFromPointer(name,size,!isUnsignedType),destructorFunction:null})};var __embind_register_bool=(rawType,name,trueValue,falseValue)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:function(wt){return!!wt},toWireType:function(destructors,o){return o?trueValue:falseValue},readValueFromPointer:function(pointer){return this.fromWireType((growMemViews(),HEAPU8)[pointer])},destructorFunction:null})};var shallowCopyInternalPointer=o=>({count:o.count,deleteScheduled:o.deleteScheduled,preservePointerOnDelete:o.preservePointerOnDelete,ptr:o.ptr,ptrType:o.ptrType,smartPtr:o.smartPtr,smartPtrType:o.smartPtrType});var throwInstanceAlreadyDeleted=obj=>{function getInstanceTypeName(handle){return handle.$$.ptrType.registeredClass.name}throwBindingError(getInstanceTypeName(obj)+" instance already deleted")};var finalizationRegistry=false;var detachFinalizer=handle=>{};var runDestructor=$$=>{if($$.smartPtr){$$.smartPtrType.rawDestructor($$.smartPtr)}else{$$.ptrType.registeredClass.rawDestructor($$.ptr)}};var releaseClassHandle=$$=>{$$.count.value-=1;var toDelete=0===$$.count.value;if(toDelete){runDestructor($$)}};var attachFinalizer=handle=>{if(!globalThis.FinalizationRegistry){attachFinalizer=handle=>handle;return handle}finalizationRegistry=new FinalizationRegistry(info=>{releaseClassHandle(info.$$)});attachFinalizer=handle=>{var $$=handle.$$;var hasSmartPtr=!!$$.smartPtr;if(hasSmartPtr){var info={$$};finalizationRegistry.register(handle,info,handle)}return handle};detachFinalizer=handle=>finalizationRegistry.unregister(handle);return attachFinalizer(handle)};var deletionQueue=[];var flushPendingDeletes=()=>{while(deletionQueue.length){var obj=deletionQueue.pop();obj.$$.deleteScheduled=false;obj["delete"]()}};var delayFunction;var init_ClassHandle=()=>{let proto=ClassHandle.prototype;Object.assign(proto,{isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(other instanceof ClassHandle)){return false}var leftClass=this.$$.ptrType.registeredClass;var left=this.$$.ptr;other.$$=other.$$;var rightClass=other.$$.ptrType.registeredClass;var right=other.$$.ptr;while(leftClass.baseClass){left=leftClass.upcast(left);leftClass=leftClass.baseClass}while(rightClass.baseClass){right=rightClass.upcast(right);rightClass=rightClass.baseClass}return leftClass===rightClass&&left===right},clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.preservePointerOnDelete){this.$$.count.value+=1;return this}else{var clone=attachFinalizer(Object.create(Object.getPrototypeOf(this),{$$:{value:shallowCopyInternalPointer(this.$$)}}));clone.$$.count.value+=1;clone.$$.deleteScheduled=false;return clone}},delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}detachFinalizer(this);releaseClassHandle(this.$$);if(!this.$$.preservePointerOnDelete){this.$$.smartPtr=undefined;this.$$.ptr=undefined}},isDeleted(){return!this.$$.ptr},deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}deletionQueue.push(this);if(deletionQueue.length===1&&delayFunction){delayFunction(flushPendingDeletes)}this.$$.deleteScheduled=true;return this}});const symbolDispose=Symbol.dispose;if(symbolDispose){proto[symbolDispose]=proto["delete"]}};function ClassHandle(){}var createNamedFunction=(name,func)=>Object.defineProperty(func,"name",{value:name});var registeredPointers={};var ensureOverloadTable=(proto,methodName,humanName)=>{if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(...args){if(!proto[methodName].overloadTable.hasOwnProperty(args.length)){throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`)}return proto[methodName].overloadTable[args.length].apply(this,args)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}};var exposePublicSymbol=(name,value,numArguments)=>{if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError(`Cannot register public name '${name}' twice`)}ensureOverloadTable(Module,name,name);if(Module[name].overloadTable.hasOwnProperty(numArguments)){throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`)}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var char_0=48;var char_9=57;var makeLegalFunctionName=name=>{name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return`_${name}`}return name};function RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast){this.name=name;this.constructor=constructor;this.instancePrototype=instancePrototype;this.rawDestructor=rawDestructor;this.baseClass=baseClass;this.getActualType=getActualType;this.upcast=upcast;this.downcast=downcast;this.pureVirtualFunctions=[]}var upcastPointer=(ptr,ptrClass,desiredClass)=>{while(ptrClass!==desiredClass){if(!ptrClass.upcast){throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`)}ptr=ptrClass.upcast(ptr);ptrClass=ptrClass.baseClass}return ptr};var embindRepr=v=>{if(v===null){return"null"}var t=typeof v;if(t==="object"||t==="array"||t==="function"){return v.toString()}else{return""+v}};function constNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}function genericPointerToWireType(destructors,handle){var ptr;if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}if(this.isSmartPointer){ptr=this.rawConstructor();if(destructors!==null){destructors.push(this.rawDestructor,ptr)}return ptr}else{return 0}}if(!handle||!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(!this.isConst&&handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);if(this.isSmartPointer){if(undefined===handle.$$.smartPtr){throwBindingError("Passing raw pointer to smart pointer is illegal")}switch(this.sharingPolicy){case 0:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}break;case 1:ptr=handle.$$.smartPtr;break;case 2:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{var clonedHandle=handle["clone"]();ptr=this.rawShare(ptr,Emval.toHandle(()=>clonedHandle["delete"]()));if(destructors!==null){destructors.push(this.rawDestructor,ptr)}}break;default:throwBindingError("Unsupporting sharing policy")}}return ptr}function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}var downcastPointer=(ptr,ptrClass,desiredClass)=>{if(ptrClass===desiredClass){return ptr}if(undefined===desiredClass.baseClass){return null}var rv=downcastPointer(ptr,ptrClass,desiredClass.baseClass);if(rv===null){return null}return desiredClass.downcast(rv)};var registeredInstances={};var getBasestPointer=(class_,ptr)=>{if(ptr===undefined){throwBindingError("ptr should not be undefined")}while(class_.baseClass){ptr=class_.upcast(ptr);class_=class_.baseClass}return ptr};var getInheritedInstance=(class_,ptr)=>{ptr=getBasestPointer(class_,ptr);return registeredInstances[ptr]};var makeClassHandle=(prototype,record)=>{if(!record.ptrType||!record.ptr){throwInternalError("makeClassHandle requires ptr and ptrType")}var hasSmartPtrType=!!record.smartPtrType;var hasSmartPtr=!!record.smartPtr;if(hasSmartPtrType!==hasSmartPtr){throwInternalError("Both smartPtrType and smartPtr must be specified")}record.count={value:1};return attachFinalizer(Object.create(prototype,{$$:{value:record,writable:true}}))};function RegisteredPointer_fromWireType(ptr){var rawPointer=this.getPointee(ptr);if(!rawPointer){this.destructor(ptr);return null}var registeredInstance=getInheritedInstance(this.registeredClass,rawPointer);if(undefined!==registeredInstance){if(0===registeredInstance.$$.count.value){registeredInstance.$$.ptr=rawPointer;registeredInstance.$$.smartPtr=ptr;return registeredInstance["clone"]()}else{var rv=registeredInstance["clone"]();this.destructor(ptr);return rv}}function makeDefaultHandle(){if(this.isSmartPointer){return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this.pointeeType,ptr:rawPointer,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this,ptr})}}var actualType=this.registeredClass.getActualType(rawPointer);var registeredPointerRecord=registeredPointers[actualType];if(!registeredPointerRecord){return makeDefaultHandle.call(this)}var toType;if(this.isConst){toType=registeredPointerRecord.constPointerType}else{toType=registeredPointerRecord.pointerType}var dp=downcastPointer(rawPointer,this.registeredClass,toType.registeredClass);if(dp===null){return makeDefaultHandle.call(this)}if(this.isSmartPointer){return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp})}}var init_RegisteredPointer=()=>{Object.assign(RegisteredPointer.prototype,{getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}return ptr},destructor(ptr){this.rawDestructor?.(ptr)},readValueFromPointer:readPointer,fromWireType:RegisteredPointer_fromWireType})};function RegisteredPointer(name,registeredClass,isReference,isConst,isSmartPointer,pointeeType,sharingPolicy,rawGetPointee,rawConstructor,rawShare,rawDestructor){this.name=name;this.registeredClass=registeredClass;this.isReference=isReference;this.isConst=isConst;this.isSmartPointer=isSmartPointer;this.pointeeType=pointeeType;this.sharingPolicy=sharingPolicy;this.rawGetPointee=rawGetPointee;this.rawConstructor=rawConstructor;this.rawShare=rawShare;this.rawDestructor=rawDestructor;if(!isSmartPointer&®isteredClass.baseClass===undefined){if(isConst){this.toWireType=constNoSmartPtrRawPointerToWireType;this.destructorFunction=null}else{this.toWireType=nonConstNoSmartPtrRawPointerToWireType;this.destructorFunction=null}}else{this.toWireType=genericPointerToWireType}}var replacePublicSymbol=(name,value,numArguments)=>{if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistent public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var embind__requireFunction=(signature,rawFunction,isAsync=false)=>{signature=AsciiToString(signature);function makeDynCaller(){var rtn=getWasmTableEntry(rawFunction);return rtn}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`)}return fp};class UnboundTypeError extends Error{}var getTypeName=type=>{var ptr=___getTypeName(type);var rv=AsciiToString(ptr);_free(ptr);return rv};var throwUnboundTypeError=(message,types)=>{var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(`${message}: `+unboundTypes.map(getTypeName).join([", "]))};var __embind_register_class=(rawType,rawPointerType,rawConstPointerType,baseClassRawType,getActualTypeSignature,getActualType,upcastSignature,upcast,downcastSignature,downcast,name,destructorSignature,rawDestructor)=>{name=AsciiToString(name);getActualType=embind__requireFunction(getActualTypeSignature,getActualType);upcast&&=embind__requireFunction(upcastSignature,upcast);downcast&&=embind__requireFunction(downcastSignature,downcast);rawDestructor=embind__requireFunction(destructorSignature,rawDestructor);var legalFunctionName=makeLegalFunctionName(name);exposePublicSymbol(legalFunctionName,function(){throwUnboundTypeError(`Cannot construct ${name} due to unbound types`,[baseClassRawType])});whenDependentTypesAreResolved([rawType,rawPointerType,rawConstPointerType],baseClassRawType?[baseClassRawType]:[],base=>{base=base[0];var baseClass;var basePrototype;if(baseClassRawType){baseClass=base.registeredClass;basePrototype=baseClass.instancePrototype}else{basePrototype=ClassHandle.prototype}var constructor=createNamedFunction(name,function(...args){if(Object.getPrototypeOf(this)!==instancePrototype){throw new BindingError(`Use 'new' to construct ${name}`)}if(undefined===registeredClass.constructor_body){throw new BindingError(`${name} has no accessible constructor`)}var body=registeredClass.constructor_body[args.length];if(undefined===body){throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`)}return body.apply(this,args)});var instancePrototype=Object.create(basePrototype,{constructor:{value:constructor}});constructor.prototype=instancePrototype;var registeredClass=new RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast);if(registeredClass.baseClass){registeredClass.baseClass.__derivedClasses??=[];registeredClass.baseClass.__derivedClasses.push(registeredClass)}var referenceConverter=new RegisteredPointer(name,registeredClass,true,false,false);var pointerConverter=new RegisteredPointer(name+"*",registeredClass,false,false,false);var constPointerConverter=new RegisteredPointer(name+" const*",registeredClass,false,true,false);registeredPointers[rawType]={pointerType:pointerConverter,constPointerType:constPointerConverter};replacePublicSymbol(legalFunctionName,constructor);return[referenceConverter,pointerConverter,constPointerConverter]})};var heap32VectorToArray=(count,firstElement)=>{var array=[];for(var i=0;i>2])}return array};function usesDestructorStack(argTypes){for(var i=1;i{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);invoker=embind__requireFunction(invokerSignature,invoker);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`constructor ${classType.name}`;if(undefined===classType.registeredClass.constructor_body){classType.registeredClass.constructor_body=[]}if(undefined!==classType.registeredClass.constructor_body[argCount-1]){throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`)}classType.registeredClass.constructor_body[argCount-1]=()=>{throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`,rawArgTypes)};whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{argTypes.splice(1,0,null);classType.registeredClass.constructor_body[argCount-1]=craftInvokerFunction(humanName,argTypes,null,invoker,rawConstructor);return[]});return[]})};var getFunctionName=signature=>{signature=signature.trim();const argsIndex=signature.indexOf("(");if(argsIndex===-1)return signature;return signature.slice(0,argsIndex)};var __embind_register_class_function=(rawClassType,methodName,argCount,rawArgTypesAddr,invokerSignature,rawInvoker,context,isPureVirtual,isAsync,isNonnullReturn)=>{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);methodName=AsciiToString(methodName);methodName=getFunctionName(methodName);rawInvoker=embind__requireFunction(invokerSignature,rawInvoker,isAsync);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`${classType.name}.${methodName}`;if(methodName.startsWith("@@")){methodName=Symbol[methodName.substring(2)]}if(isPureVirtual){classType.registeredClass.pureVirtualFunctions.push(methodName)}function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`,rawArgTypes)}var proto=classType.registeredClass.instancePrototype;var method=proto[methodName];if(undefined===method||undefined===method.overloadTable&&method.className!==classType.name&&method.argCount===argCount-2){unboundTypesHandler.argCount=argCount-2;unboundTypesHandler.className=classType.name;proto[methodName]=unboundTypesHandler}else{ensureOverloadTable(proto,methodName,humanName);proto[methodName].overloadTable[argCount-2]=unboundTypesHandler}whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{var memberFunction=craftInvokerFunction(humanName,argTypes,classType,rawInvoker,context,isAsync);if(undefined===proto[methodName].overloadTable){memberFunction.argCount=argCount-2;proto[methodName]=memberFunction}else{proto[methodName].overloadTable[argCount-2]=memberFunction}return[]});return[]})};var __embind_register_constant=(name,type,value)=>{name=AsciiToString(name);whenDependentTypesAreResolved([],[type],type=>{type=type[0];Module[name]=type.fromWireType(value);return[]})};var emval_freelist=[];var emval_handles=[0,1,,1,null,1,true,1,false,1];var __emval_decref=handle=>{if(handle>9&&0===--emval_handles[handle+1]){emval_handles[handle]=undefined;emval_freelist.push(handle)}};var Emval={toValue:handle=>{if(!handle){throwBindingError(`Cannot use deleted val. handle = ${handle}`)}return emval_handles[handle]},toHandle:value=>{switch(value){case undefined:return 2;case null:return 4;case true:return 6;case false:return 8;default:{const handle=emval_freelist.pop()||emval_handles.length;emval_handles[handle]=value;emval_handles[handle+1]=1;return handle}}}};var EmValType={name:"emscripten::val",fromWireType:handle=>{var rv=Emval.toValue(handle);__emval_decref(handle);return rv},toWireType:(destructors,value)=>Emval.toHandle(value),readValueFromPointer:readPointer,destructorFunction:null};var __embind_register_emval=rawType=>registerType(rawType,EmValType);var enumReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP8)[pointer])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU8)[pointer])};case 2:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP16)[pointer>>1])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU16)[pointer>>1])};case 4:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP32)[pointer>>2])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU32)[pointer>>2])};default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_enum=(rawType,name,size,isSigned)=>{name=AsciiToString(name);function ctor(){}ctor.values={};registerType(rawType,{name,constructor:ctor,fromWireType:function(c){return this.constructor.values[c]},toWireType:(destructors,c)=>c.value,readValueFromPointer:enumReadValueFromPointer(name,size,isSigned),destructorFunction:null});exposePublicSymbol(name,ctor)};var requireRegisteredType=(rawType,humanName)=>{var impl=registeredTypes[rawType];if(undefined===impl){throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`)}return impl};var __embind_register_enum_value=(rawEnumType,name,enumValue)=>{var enumType=requireRegisteredType(rawEnumType,"enum");name=AsciiToString(name);var Enum=enumType.constructor;var Value=Object.create(enumType.constructor.prototype,{value:{value:enumValue},constructor:{value:createNamedFunction(`${enumType.name}_${name}`,function(){})}});Enum.values[enumValue]=Value;Enum[name]=Value};var floatReadValueFromPointer=(name,width)=>{switch(width){case 4:return function(pointer){return this.fromWireType((growMemViews(),HEAPF32)[pointer>>2])};case 8:return function(pointer){return this.fromWireType((growMemViews(),HEAPF64)[pointer>>3])};default:throw new TypeError(`invalid float width (${width}): ${name}`)}};var __embind_register_float=(rawType,name,size)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:value=>value,toWireType:(destructors,value)=>value,readValueFromPointer:floatReadValueFromPointer(name,size),destructorFunction:null})};var __embind_register_function=(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn,isAsync,isNonnullReturn)=>{var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=AsciiToString(name);name=getFunctionName(name);rawInvoker=embind__requireFunction(signature,rawInvoker,isAsync);exposePublicSymbol(name,function(){throwUnboundTypeError(`Cannot call ${name} due to unbound types`,argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,argTypes=>{var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn,isAsync),argCount-1);return[]})};var __embind_register_integer=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0;let fromWireType=value=>value;if(isUnsignedType){var bitshift=32-8*size;fromWireType=value=>value<>>bitshift;maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>value,readValueFromPointer:integerReadValueFromPointer(name,size,minRange!==0),destructorFunction:null})};var __embind_register_memory_view=(rawType,dataTypeIndex,name)=>{var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){var size=(growMemViews(),HEAPU32)[handle>>2];var data=(growMemViews(),HEAPU32)[handle+4>>2];return new TA((growMemViews(),HEAP8).buffer,data,size)}name=AsciiToString(name);registerType(rawType,{name,fromWireType:decodeMemoryView,readValueFromPointer:decodeMemoryView},{ignoreDuplicateRegistrations:true})};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,(growMemViews(),HEAPU8),outPtr,maxBytesToWrite);var __embind_register_std_string=(rawType,name)=>{name=AsciiToString(name);var stdStringIsUTF8=true;registerType(rawType,{name,fromWireType(value){var length=(growMemViews(),HEAPU32)[value>>2];var payload=value+4;var str;if(stdStringIsUTF8){str=UTF8ToString(payload,length,true)}else{str="";for(var i=0;i>2]=length;if(valueIsOfTypeString){if(stdStringIsUTF8){stringToUTF8(value,ptr,length+1)}else{for(var i=0;i255){_free(base);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}(growMemViews(),HEAPU8)[ptr+i]=charCode}}}else{(growMemViews(),HEAPU8).set(value,ptr)}if(destructors!==null){destructors.push(_free,base)}return base},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var UTF16Decoder=globalThis.TextDecoder?new TextDecoder("utf-16le"):undefined;var UTF16ToString=(ptr,maxBytesToRead,ignoreNul)=>{var idx=ptr>>1;var endIdx=findStringEnd((growMemViews(),HEAPU16),idx,maxBytesToRead/2,ignoreNul);if(endIdx-idx>16&&UTF16Decoder)return UTF16Decoder.decode((growMemViews(),HEAPU16).slice(idx,endIdx));var str="";for(var i=idx;i{maxBytesToWrite??=2147483647;if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}(growMemViews(),HEAP16)[outPtr>>1]=0;return outPtr-startPtr};var lengthBytesUTF16=str=>str.length*2;var UTF32ToString=(ptr,maxBytesToRead,ignoreNul)=>{var str="";var startIdx=ptr>>2;for(var i=0;!(i>=maxBytesToRead/4);i++){var utf32=(growMemViews(),HEAPU32)[startIdx+i];if(!utf32&&!ignoreNul)break;str+=String.fromCodePoint(utf32)}return str};var stringToUTF32=(str,outPtr,maxBytesToWrite)=>{maxBytesToWrite??=2147483647;if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i65535){i++}(growMemViews(),HEAP32)[outPtr>>2]=codePoint;outPtr+=4;if(outPtr+4>endPtr)break}(growMemViews(),HEAP32)[outPtr>>2]=0;return outPtr-startPtr};var lengthBytesUTF32=str=>{var len=0;for(var i=0;i65535){i++}len+=4}return len};var __embind_register_std_wstring=(rawType,charSize,name)=>{name=AsciiToString(name);var decodeString,encodeString,lengthBytesUTF;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16}else{decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32}registerType(rawType,{name,fromWireType:value=>{var length=(growMemViews(),HEAPU32)[value>>2];var str=decodeString(value+4,length*charSize,true);_free(value);return str},toWireType:(destructors,value)=>{if(!(typeof value=="string")){throwBindingError(`Cannot pass non-string to C++ string type ${name}`)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);(growMemViews(),HEAPU32)[ptr>>2]=length/charSize;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var __embind_register_value_object=(rawType,name,constructorSignature,rawConstructor,destructorSignature,rawDestructor)=>{structRegistrations[rawType]={name:AsciiToString(name),rawConstructor:embind__requireFunction(constructorSignature,rawConstructor),rawDestructor:embind__requireFunction(destructorSignature,rawDestructor),fields:[]}};var __embind_register_value_object_field=(structType,fieldName,getterReturnType,getterSignature,getter,getterContext,setterArgumentType,setterSignature,setter,setterContext)=>{structRegistrations[structType].fields.push({fieldName:AsciiToString(fieldName),getterReturnType,getter:embind__requireFunction(getterSignature,getter),getterContext,setterArgumentType,setter:embind__requireFunction(setterSignature,setter),setterContext})};var __embind_register_void=(rawType,name)=>{name=AsciiToString(name);registerType(rawType,{isVoid:true,name,fromWireType:()=>undefined,toWireType:(destructors,o)=>undefined})};var __emscripten_init_main_thread_js=tb=>{__emscripten_thread_init(tb,!ENVIRONMENT_IS_WORKER,1,!ENVIRONMENT_IS_WEB,2097152,false);PThread.threadInitTLS()};var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}quit_(1,e)};var maybeExit=()=>{if(!keepRuntimeAlive()){try{if(ENVIRONMENT_IS_PTHREAD){if(_pthread_self())__emscripten_thread_exit(EXITSTATUS);return}_exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){return}try{func();maybeExit()}catch(e){handleException(e)}};var __emscripten_thread_mailbox_await=pthread_ptr=>{if(Atomics.waitAsync){var wait=Atomics.waitAsync((growMemViews(),HEAP32),pthread_ptr>>2,pthread_ptr);wait.value.then(checkMailbox);var waitingAsync=pthread_ptr+128;Atomics.store((growMemViews(),HEAP32),waitingAsync>>2,1)}};var checkMailbox=()=>callUserCallback(()=>{var pthread_ptr=_pthread_self();if(pthread_ptr){__emscripten_thread_mailbox_await(pthread_ptr);__emscripten_check_mailbox()}});var __emscripten_notify_mailbox_postmessage=(targetThread,currThreadId)=>{if(targetThread==currThreadId){setTimeout(checkMailbox)}else if(ENVIRONMENT_IS_PTHREAD){postMessage({targetThread,cmd:"checkMailbox"})}else{var worker=PThread.pthreads[targetThread];if(!worker){return}worker.postMessage({cmd:"checkMailbox"})}};var proxiedJSCallArgs=[];var __emscripten_receive_on_main_thread_js=(funcIndex,emAsmAddr,callingThread,numCallArgs,args)=>{numCallArgs/=2;proxiedJSCallArgs.length=numCallArgs;var b=args>>3;for(var i=0;i{if(!ENVIRONMENT_IS_PTHREAD)cleanupThread(thread);else postMessage({cmd:"cleanupThread",thread})};var __emscripten_thread_set_strongref=thread=>{};var __emscripten_throw_longjmp=()=>{throw Infinity};var emval_methodCallers=[];var emval_addMethodCaller=caller=>{var id=emval_methodCallers.length;emval_methodCallers.push(caller);return id};var emval_lookupTypes=(argCount,argTypes)=>{var a=new Array(argCount);for(var i=0;i>2],`parameter ${i}`)}return a};var emval_returnValue=(toReturnWire,destructorsRef,handle)=>{var destructors=[];var result=toReturnWire(destructors,handle);if(destructors.length){(growMemViews(),HEAPU32)[destructorsRef>>2]=Emval.toHandle(destructors)}return result};var emval_symbols={};var getStringOrSymbol=address=>{var symbol=emval_symbols[address];if(symbol===undefined){return AsciiToString(address)}return symbol};var __emval_create_invoker=(argCount,argTypesPtr,kind)=>{var GenericWireTypeSize=8;var[retType,...argTypes]=emval_lookupTypes(argCount,argTypesPtr);var toReturnWire=retType.toWireType.bind(retType);var argFromPtr=argTypes.map(type=>type.readValueFromPointer.bind(type));argCount--;var captures={toValue:Emval.toValue};var args=argFromPtr.map((argFromPtr,i)=>{var captureName=`argFromPtr${i}`;captures[captureName]=argFromPtr;return`${captureName}(args${i?"+"+i*GenericWireTypeSize:""})`});var functionBody;switch(kind){case 0:functionBody="toValue(handle)";break;case 2:functionBody="new (toValue(handle))";break;case 3:functionBody="";break;case 1:captures["getStringOrSymbol"]=getStringOrSymbol;functionBody="toValue(handle)[getStringOrSymbol(methodName)]";break}functionBody+=`(${args})`;if(!retType.isVoid){captures["toReturnWire"]=toReturnWire;captures["emval_returnValue"]=emval_returnValue;functionBody=`return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`}functionBody=`return function (handle, methodName, destructorsRef, args) {\n ${functionBody}\n }`;var invokerFunction=new Function(Object.keys(captures),functionBody)(...Object.values(captures));var functionName=`methodCaller<(${argTypes.map(t=>t.name)}) => ${retType.name}>`;return emval_addMethodCaller(createNamedFunction(functionName,invokerFunction))};var __emval_get_property=(handle,key)=>{handle=Emval.toValue(handle);key=Emval.toValue(key);return Emval.toHandle(handle[key])};var __emval_incref=handle=>{if(handle>9){emval_handles[handle+1]+=1}};var __emval_invoke=(caller,handle,methodName,destructorsRef,args)=>emval_methodCallers[caller](handle,methodName,destructorsRef,args);var __emval_new_cstring=v=>Emval.toHandle(getStringOrSymbol(v));var __emval_run_destructors=handle=>{var destructors=Emval.toValue(handle);runDestructors(destructors);__emval_decref(handle)};var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function __mmap_js(len,prot,flags,fd,offset,allocated,addr){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(10,0,1,len,prot,flags,fd,offset,allocated,addr);offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);var res=FS.mmap(stream,len,offset,prot,flags);var ptr=res.ptr;(growMemViews(),HEAP32)[allocated>>2]=res.allocated;(growMemViews(),HEAPU32)[addr>>2]=ptr;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function __munmap_js(addr,len,prot,flags,fd,offset){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(11,0,1,addr,len,prot,flags,fd,offset);offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);if(prot&2){SYSCALLS.doMsync(addr,stream,len,flags,offset)}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __tzset_js=(timezone,daylight,std_name,dst_name)=>{var currentYear=(new Date).getFullYear();var winter=new Date(currentYear,0,1);var summer=new Date(currentYear,6,1);var winterOffset=winter.getTimezoneOffset();var summerOffset=summer.getTimezoneOffset();var stdTimezoneOffset=Math.max(winterOffset,summerOffset);(growMemViews(),HEAPU32)[timezone>>2]=stdTimezoneOffset*60;(growMemViews(),HEAP32)[daylight>>2]=Number(winterOffset!=summerOffset);var extractZone=timezoneOffset=>{var sign=timezoneOffset>=0?"-":"+";var absOffset=Math.abs(timezoneOffset);var hours=String(Math.floor(absOffset/60)).padStart(2,"0");var minutes=String(absOffset%60).padStart(2,"0");return`UTC${sign}${hours}${minutes}`};var winterName=extractZone(winterOffset);var summerName=extractZone(summerOffset);if(summerOffsetperformance.timeOrigin+performance.now();var _emscripten_date_now=()=>Date.now();var nowIsMonotonic=1;var checkWasiClock=clock_id=>clock_id>=0&&clock_id<=3;function _clock_time_get(clk_id,ignored_precision,ptime){ignored_precision=bigintToI53Checked(ignored_precision);if(!checkWasiClock(clk_id)){return 28}var now;if(clk_id===0){now=_emscripten_date_now()}else if(nowIsMonotonic){now=_emscripten_get_now()}else{return 52}var nsec=Math.round(now*1e3*1e3);(growMemViews(),HEAP64)[ptime>>3]=BigInt(nsec);return 0}var _emscripten_check_blocking_allowed=()=>{};var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var _emscripten_exit_with_live_runtime=()=>{runtimeKeepalivePush();throw"unwind"};var getHeapMax=()=>2147483648;var _emscripten_get_heap_max=()=>getHeapMax();var _emscripten_num_logical_cores=()=>navigator["hardwareConcurrency"];var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=(growMemViews(),HEAPU8).length;requestedSize>>>=0;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.language||"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};function _environ_get(__environ,environ_buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(12,0,1,__environ,environ_buf);var bufSize=0;var envp=0;for(var string of getEnvStrings()){var ptr=environ_buf+bufSize;(growMemViews(),HEAPU32)[__environ+envp>>2]=ptr;bufSize+=stringToUTF8(string,ptr,Infinity)+1;envp+=4}return 0}function _environ_sizes_get(penviron_count,penviron_buf_size){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(13,0,1,penviron_count,penviron_buf_size);var strings=getEnvStrings();(growMemViews(),HEAPU32)[penviron_count>>2]=strings.length;var bufSize=0;for(var string of strings){bufSize+=lengthBytesUTF8(string)+1}(growMemViews(),HEAPU32)[penviron_buf_size>>2]=bufSize;return 0}function _fd_close(fd){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(14,0,1,fd);try{var stream=SYSCALLS.getStreamFromFD(fd);FS.close(stream);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doReadv=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=(growMemViews(),HEAPU32)[iov+4>>2];iov+=8;var curr=FS.read(stream,(growMemViews(),HEAP8),ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}function _fd_seek(fd,offset,whence,newOffset){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(16,0,1,fd,offset,whence,newOffset);offset=bigintToI53Checked(offset);try{if(isNaN(offset))return 61;var stream=SYSCALLS.getStreamFromFD(fd);FS.llseek(stream,offset,whence);(growMemViews(),HEAP64)[newOffset>>3]=BigInt(stream.position);if(stream.getdents&&offset===0&&whence===0)stream.getdents=null;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doWritev=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=(growMemViews(),HEAPU32)[iov+4>>2];iov+=8;var curr=FS.write(stream,(growMemViews(),HEAP8),ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}PThread.init();FS.createPreloadedFile=FS_createPreloadedFile;FS.preloadFile=FS_preloadFile;FS.staticInit();init_ClassHandle();init_RegisteredPointer();{initMemory();if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["preloadPlugins"])preloadPlugins=Module["preloadPlugins"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}var proxiedFunctionTable=[_proc_exit,exitOnMainThread,pthreadCreateProxied,___syscall_fcntl64,___syscall_fstat64,___syscall_ioctl,___syscall_lstat64,___syscall_newfstatat,___syscall_openat,___syscall_stat64,__mmap_js,__munmap_js,_environ_get,_environ_sizes_get,_fd_close,_fd_read,_fd_seek,_fd_write];var ___getTypeName,__embind_initialize_bindings,_free,_pthread_self,_malloc,__emscripten_tls_init,_emscripten_builtin_memalign,__emscripten_thread_init,__emscripten_thread_crashed,__emscripten_run_js_on_main_thread,__emscripten_thread_free_data,__emscripten_thread_exit,__emscripten_check_mailbox,_setThrew,_emscripten_stack_set_limits,__emscripten_stack_restore,__emscripten_stack_alloc,_emscripten_stack_get_current,__indirect_function_table,wasmTable;function assignWasmExports(wasmExports){___getTypeName=wasmExports["ma"];__embind_initialize_bindings=wasmExports["na"];_free=wasmExports["oa"];_pthread_self=wasmExports["qa"];_malloc=wasmExports["ra"];__emscripten_tls_init=wasmExports["sa"];_emscripten_builtin_memalign=wasmExports["ta"];__emscripten_thread_init=wasmExports["ua"];__emscripten_thread_crashed=wasmExports["va"];__emscripten_run_js_on_main_thread=wasmExports["wa"];__emscripten_thread_free_data=wasmExports["xa"];__emscripten_thread_exit=wasmExports["ya"];__emscripten_check_mailbox=wasmExports["za"];_setThrew=wasmExports["Aa"];_emscripten_stack_set_limits=wasmExports["Ba"];__emscripten_stack_restore=wasmExports["Ca"];__emscripten_stack_alloc=wasmExports["Da"];_emscripten_stack_get_current=wasmExports["Ea"];__indirect_function_table=wasmTable=wasmExports["pa"]}var wasmImports;function assignWasmImports(){wasmImports={u:___cxa_throw,P:___pthread_create_js,C:___syscall_fcntl64,X:___syscall_fstat64,Z:___syscall_ioctl,W:___syscall_newfstatat,D:___syscall_openat,ca:__abort_js,t:__embind_finalize_value_object,F:__embind_register_bigint,ia:__embind_register_bool,w:__embind_register_class,v:__embind_register_class_constructor,b:__embind_register_class_function,e:__embind_register_constant,ga:__embind_register_emval,m:__embind_register_enum,c:__embind_register_enum_value,E:__embind_register_float,g:__embind_register_function,n:__embind_register_integer,k:__embind_register_memory_view,ha:__embind_register_std_string,y:__embind_register_std_wstring,r:__embind_register_value_object,d:__embind_register_value_object_field,ja:__embind_register_void,V:__emscripten_init_main_thread_js,N:__emscripten_notify_mailbox_postmessage,Q:__emscripten_receive_on_main_thread_js,z:__emscripten_thread_cleanup,U:__emscripten_thread_mailbox_await,$:__emscripten_thread_set_strongref,L:__emscripten_throw_longjmp,j:__emval_create_invoker,f:__emval_decref,p:__emval_get_property,l:__emval_incref,i:__emval_invoke,q:__emval_new_cstring,h:__emval_run_destructors,R:__mmap_js,S:__munmap_js,I:__tzset_js,ba:_clock_time_get,A:_emscripten_check_blocking_allowed,aa:_emscripten_date_now,_:_emscripten_exit_with_live_runtime,G:_emscripten_get_heap_max,o:_emscripten_get_now,ka:_emscripten_has_threading_support,H:_emscripten_num_logical_cores,M:_emscripten_resize_heap,J:_environ_get,K:_environ_sizes_get,O:_exit,x:_fd_close,Y:_fd_read,T:_fd_seek,B:_fd_write,ea:invoke_ii,s:invoke_vi,da:invoke_vii,fa:invoke_viii,a:wasmMemory}}function invoke_vi(index,a1){var sp=stackSave();try{getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viii(index,a1,a2,a3){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_ii(index,a1){var sp=stackSave();try{return getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vii(index,a1,a2){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function run(){if(runDependencies>0){dependenciesFulfilled=run;return}if(ENVIRONMENT_IS_PTHREAD){readyPromiseResolve?.(Module);initRuntime();return}preRun();if(runDependencies>0){dependenciesFulfilled=run;return}function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;if(!ENVIRONMENT_IS_PTHREAD){wasmExports=await (createWasm());run()}if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +;return moduleRtn}})();if(typeof exports==="object"&&typeof module==="object"){module.exports=BASIS;module.exports.default=BASIS}else if(typeof define==="function"&&define["amd"])define([],()=>BASIS);var isPthread=globalThis.self?.name?.startsWith("em-pthread");isPthread&&BASIS(); diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder_threads.wasm b/external/basis_universal/webgl/encoder/build/basis_encoder_threads.wasm new file mode 100644 index 0000000000..ca9c02d0e9 Binary files /dev/null and b/external/basis_universal/webgl/encoder/build/basis_encoder_threads.wasm differ diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.js b/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.js new file mode 100644 index 0000000000..775f5644b5 --- /dev/null +++ b/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.js @@ -0,0 +1,2 @@ +var BASIS=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";var ENVIRONMENT_IS_PTHREAD=ENVIRONMENT_IS_WORKER&&self.name?.startsWith("em-pthread");var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};if(ENVIRONMENT_IS_WORKER){_scriptName=self.location.href}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var wasmModule;var ABORT=false;var EXITSTATUS;function growMemViews(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}}var readyPromiseResolve,readyPromiseReject;var startWorker;if(ENVIRONMENT_IS_PTHREAD){var initializedJS=false;self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{var msgData=e["data"];var cmd=msgData.cmd;if(cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);startWorker=()=>{postMessage({cmd:"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};for(const handler of msgData.handlers){if(!Module[handler]||Module[handler].proxy){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler,args})};if(handler=="print")out=Module[handler];if(handler=="printErr")err=Module[handler]}}wasmMemory=msgData.wasmMemory;updateMemoryViews();wasmModule=msgData.wasmModule;createWasm();run()}else if(cmd==="run"){establishStackSpace(msgData.pthread_ptr);__emscripten_thread_init(msgData.pthread_ptr,0,0,1,0,0);PThread.threadInitTLS();__emscripten_thread_mailbox_await(msgData.pthread_ptr);if(!initializedJS){__embind_initialize_bindings();initializedJS=true}try{invokeEntryPoint(msgData.start_routine,msgData.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(msgData.target==="setimmediate"){}else if(cmd==="checkMailbox"){if(initializedJS){checkMailbox()}}else if(cmd){err(`worker: received unknown command ${cmd}`);err(msgData)}}catch(ex){__emscripten_thread_crashed();throw ex}}self.onmessage=handleMessage}var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function initMemory(){if(ENVIRONMENT_IS_PTHREAD){return}if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||1073741824;wasmMemory=new WebAssembly.Memory({initial:BigInt(INITIAL_MEMORY/65536),maximum:196608n,shared:true,address:"i64"})}updateMemoryViews()}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;if(ENVIRONMENT_IS_PTHREAD)return startWorker();if(!Module["noFSInit"]&&!FS.initialized)FS.init();TTY.init();wasmExports["ma"]();FS.ignorePermissions=false}function postRun(){if(ENVIRONMENT_IS_PTHREAD){return}if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("basis_encoder_threads_wasm64.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){assignWasmImports();var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=applySignatureConversions(wasmExports);registerTLSInit(wasmExports["ta"]);assignWasmExports(wasmExports);wasmModule=module;return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"],result["module"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}if(ENVIRONMENT_IS_PTHREAD){var instance=new WebAssembly.Instance(wasmModule,getWasmImports());return receiveInstance(instance,wasmModule)}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var terminateWorker=worker=>{worker.terminate();worker.onmessage=e=>{}};var cleanupThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];PThread.returnWorkerToPool(worker)};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var runDependencies=0;var dependenciesFulfilled=null;var removeRunDependency=id=>{runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}};var addRunDependency=id=>{runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)};var spawnThread=threadParams=>{var worker=PThread.getNewWorker();if(!worker){return 6}PThread.runningWorkers.push(worker);PThread.pthreads[threadParams.pthread_ptr]=worker;worker.pthread_ptr=threadParams.pthread_ptr;var msg={cmd:"run",start_routine:threadParams.startRoutine,arg:threadParams.arg,pthread_ptr:threadParams.pthread_ptr};worker.postMessage(msg,threadParams.transferList);return 0};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var stackSave=()=>_emscripten_stack_get_current();var stackRestore=val=>__emscripten_stack_restore(val);var stackAlloc=sz=>__emscripten_stack_alloc(sz);var proxyToMainThread=(funcIndex,emAsmAddr,sync,...callArgs)=>{var serializedNumCallArgs=callArgs.length*2;var sp=stackSave();var args=stackAlloc(serializedNumCallArgs*8);var b=args/8;for(var i=0;i{EXITSTATUS=status;if(ENVIRONMENT_IS_PTHREAD){exitOnMainThread(status);throw"unwind"}_proc_exit(status)};var _exit=exitJS;var PThread={unusedWorkers:[],runningWorkers:[],tlsInitFunctions:[],pthreads:{},init(){if(!ENVIRONMENT_IS_PTHREAD){PThread.initMainThread()}},initMainThread(){var pthreadPoolSize=18;while(pthreadPoolSize--){PThread.allocateUnusedWorker()}addOnPreRun(async()=>{var pthreadPoolReady=PThread.loadWasmModuleToAllWorkers();addRunDependency("loading-workers");await pthreadPoolReady;removeRunDependency("loading-workers")})},terminateAllThreads:()=>{for(var worker of PThread.runningWorkers){terminateWorker(worker)}for(var worker of PThread.unusedWorkers){terminateWorker(worker)}PThread.unusedWorkers=[];PThread.runningWorkers=[];PThread.pthreads={}},returnWorkerToPool:worker=>{var pthread_ptr=worker.pthread_ptr;delete PThread.pthreads[pthread_ptr];PThread.unusedWorkers.push(worker);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0;__emscripten_thread_free_data(pthread_ptr)},threadInitTLS(){PThread.tlsInitFunctions.forEach(f=>f())},loadWasmModuleToWorker:worker=>new Promise(onFinishedLoading=>{worker.onmessage=e=>{var d=e["data"];var cmd=d.cmd;if(d.targetThread&&d.targetThread!=_pthread_self()){var targetWorker=PThread.pthreads[d.targetThread];if(targetWorker){targetWorker.postMessage(d,d.transferList)}else{err(`Internal error! Worker sent a message "${cmd}" to target pthread ${d.targetThread}, but that thread no longer exists!`)}return}if(cmd==="checkMailbox"){checkMailbox()}else if(cmd==="spawnThread"){spawnThread(d)}else if(cmd==="cleanupThread"){callUserCallback(()=>cleanupThread(d.thread))}else if(cmd==="loaded"){worker.loaded=true;onFinishedLoading(worker)}else if(d.target==="setimmediate"){worker.postMessage(d)}else if(cmd==="callHandler"){Module[d.handler](...d.args)}else if(cmd){err(`worker sent an unknown command ${cmd}`)}};worker.onerror=e=>{var message="worker sent an error!";err(`${message} ${e.filename}:${e.lineno}: ${e.message}`);throw e};var handlers=[];var knownHandlers=["onExit","onAbort","print","printErr"];for(var handler of knownHandlers){if(Module.propertyIsEnumerable(handler)){handlers.push(handler)}}worker.postMessage({cmd:"load",handlers,wasmMemory,wasmModule})}),async loadWasmModuleToAllWorkers(){if(ENVIRONMENT_IS_PTHREAD){return}let pthreadPoolReady=Promise.all(PThread.unusedWorkers.map(PThread.loadWasmModuleToWorker));return pthreadPoolReady},allocateUnusedWorker(){var worker;var pthreadMainJs=_scriptName;if(Module["mainScriptUrlOrBlob"]){pthreadMainJs=Module["mainScriptUrlOrBlob"];if(typeof pthreadMainJs!="string"){pthreadMainJs=URL.createObjectURL(pthreadMainJs)}}worker=new Worker(pthreadMainJs,{name:"em-pthread"});PThread.unusedWorkers.push(worker)},getNewWorker(){if(PThread.unusedWorkers.length==0){PThread.allocateUnusedWorker();PThread.loadWasmModuleToWorker(PThread.unusedWorkers[0])}return PThread.unusedWorkers.pop()}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);function establishStackSpace(pthread_ptr){var stackHigh=Number((growMemViews(),HEAPU64)[(pthread_ptr+88)/8]);var stackSize=Number((growMemViews(),HEAPU64)[(pthread_ptr+96)/8]);var stackLow=stackHigh-stackSize;_emscripten_stack_set_limits(stackHigh,stackLow);stackRestore(stackHigh)}var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{funcPtr=Number(funcPtr);var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(BigInt(funcPtr))}return func};var invokeEntryPoint=(ptr,arg)=>{runtimeKeepaliveCounter=0;noExitRuntime=0;var result=(a1=>getWasmTableEntry(ptr).call(null,BigInt(a1)))(arg);function finish(result){if(keepRuntimeAlive()){EXITSTATUS=result;return}__emscripten_thread_exit(result)}finish(result)};var noExitRuntime=true;var registerTLSInit=tlsInitFunc=>PThread.tlsInitFunctions.push(tlsInitFunc);var wasmMemory;class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-48}set_type(type){(growMemViews(),HEAPU64)[(this.ptr+8)/8]=BigInt(type)}get_type(){return Number((growMemViews(),HEAPU64)[(this.ptr+8)/8])}set_destructor(destructor){(growMemViews(),HEAPU64)[(this.ptr+16)/8]=BigInt(destructor)}get_destructor(){return Number((growMemViews(),HEAPU64)[(this.ptr+16)/8])}set_caught(caught){caught=caught?1:0;(growMemViews(),HEAP8)[this.ptr+24]=caught}get_caught(){return(growMemViews(),HEAP8)[this.ptr+24]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;(growMemViews(),HEAP8)[this.ptr+25]=rethrown}get_rethrown(){return(growMemViews(),HEAP8)[this.ptr+25]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){(growMemViews(),HEAPU64)[(this.ptr+32)/8]=BigInt(adjustedPtr)}get_adjusted_ptr(){return Number((growMemViews(),HEAPU64)[(this.ptr+32)/8])}}var exceptionLast=0;var uncaughtExceptionCount=0;var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function ___cxa_throw(ptr,type,destructor){ptr=bigintToI53Checked(ptr);type=bigintToI53Checked(type);destructor=bigintToI53Checked(destructor);var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast}function pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(2,0,1,pthread_ptr,attr,startRoutine,arg);return ___pthread_create_js(pthread_ptr,attr,startRoutine,arg)}var _emscripten_has_threading_support=()=>!!globalThis.SharedArrayBuffer;function ___pthread_create_js(pthread_ptr,attr,startRoutine,arg){pthread_ptr=bigintToI53Checked(pthread_ptr);attr=bigintToI53Checked(attr);startRoutine=bigintToI53Checked(startRoutine);arg=bigintToI53Checked(arg);if(!_emscripten_has_threading_support()){return 6}var transferList=[];var error=0;if(ENVIRONMENT_IS_PTHREAD&&(transferList.length===0||error)){return pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg)}if(error)return error;var threadParams={startRoutine,pthread_ptr,arg,transferList};if(ENVIRONMENT_IS_PTHREAD){threadParams.cmd="spawnThread";postMessage(threadParams,transferList);return 0}return spawnThread(threadParams)}var syscallGetVarargP=()=>{var ret=Number((growMemViews(),HEAPU64)[SYSCALLS.varargs/8]);SYSCALLS.varargs+=8;return ret};var syscallGetVarargI=()=>{var ret=(growMemViews(),HEAP32)[+SYSCALLS.varargs/4];SYSCALLS.varargs+=4;return ret};var PATH={isAbs:path=>path.charAt(0)==="/",splitPath:filename=>{var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;return splitPathRe.exec(filename).slice(1)},normalizeArray:(parts,allowAboveRoot)=>{var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up;up--){parts.unshift("..")}}return parts},normalize:path=>{var isAbsolute=PATH.isAbs(path),trailingSlash=path.slice(-1)==="/";path=PATH.normalizeArray(path.split("/").filter(p=>!!p),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path},dirname:path=>{var result=PATH.splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.slice(0,-1)}return root+dir},basename:path=>path&&path.match(/([^\/]+|\/)\/*$/)[1],join:(...paths)=>PATH.normalize(paths.join("/")),join2:(l,r)=>PATH.normalize(l+"/"+r)};var initRandomFill=()=>view=>view.set(crypto.getRandomValues(new Uint8Array(view.byteLength)));var randomFill=view=>{(randomFill=initRandomFill())(view)};var PATH_FS={resolve:(...args)=>{var resolvedPath="",resolvedAbsolute=false;for(var i=args.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?args[i]:FS.cwd();if(typeof path!="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){return""}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=PATH.isAbs(path)}resolvedPath=PATH.normalizeArray(resolvedPath.split("/").filter(p=>!!p),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."},relative:(from,to)=>{from=PATH_FS.resolve(from).slice(1);to=PATH_FS.resolve(to).slice(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof ArrayBuffer?heapOrArray.subarray(idx,endPtr):heapOrArray.slice(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var FS_stdin_getChar_buffer=[];var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var intArrayFromString=(stringy,dontAddNull,length)=>{var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array};var FS_stdin_getChar=()=>{if(!FS_stdin_getChar_buffer.length){var result=null;if(globalThis.window?.prompt){result=window.prompt("Input: ");if(result!==null){result+="\n"}}else{}if(!result){return null}FS_stdin_getChar_buffer=intArrayFromString(result,true)}return FS_stdin_getChar_buffer.shift()};var TTY={ttys:[],init(){},shutdown(){},register(dev,ops){TTY.ttys[dev]={input:[],output:[],ops};FS.registerDevice(dev,TTY.stream_ops)},stream_ops:{open(stream){var tty=TTY.ttys[stream.node.rdev];if(!tty){throw new FS.ErrnoError(43)}stream.tty=tty;stream.seekable=false},close(stream){stream.tty.ops.fsync(stream.tty)},fsync(stream){stream.tty.ops.fsync(stream.tty)},read(stream,buffer,offset,length,pos){if(!stream.tty||!stream.tty.ops.get_char){throw new FS.ErrnoError(60)}var bytesRead=0;for(var i=0;i0){out(UTF8ArrayToString(tty.output));tty.output=[]}},ioctl_tcgets(tty){return{c_iflag:25856,c_oflag:5,c_cflag:191,c_lflag:35387,c_cc:[3,28,127,21,4,0,1,0,17,19,26,0,18,15,23,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}},ioctl_tcsets(tty,optional_actions,data){return 0},ioctl_tiocgwinsz(tty){return[24,80]}},default_tty1_ops:{put_char(tty,val){if(val===null||val===10){err(UTF8ArrayToString(tty.output));tty.output=[]}else{if(val!=0)tty.output.push(val)}},fsync(tty){if(tty.output?.length>0){err(UTF8ArrayToString(tty.output));tty.output=[]}}}};var zeroMemory=(ptr,size)=>(growMemViews(),HEAPU8).fill(0,ptr,ptr+size);var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var mmapAlloc=size=>{size=alignMemory(size,65536);var ptr=_emscripten_builtin_memalign(65536,size);if(ptr)zeroMemory(ptr,size);return ptr};var MEMFS={ops_table:null,mount(mount){return MEMFS.createNode(null,"/",16895,0)},createNode(parent,name,mode,dev){if(FS.isBlkdev(mode)||FS.isFIFO(mode)){throw new FS.ErrnoError(63)}MEMFS.ops_table||={dir:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,lookup:MEMFS.node_ops.lookup,mknod:MEMFS.node_ops.mknod,rename:MEMFS.node_ops.rename,unlink:MEMFS.node_ops.unlink,rmdir:MEMFS.node_ops.rmdir,readdir:MEMFS.node_ops.readdir,symlink:MEMFS.node_ops.symlink},stream:{llseek:MEMFS.stream_ops.llseek}},file:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:{llseek:MEMFS.stream_ops.llseek,read:MEMFS.stream_ops.read,write:MEMFS.stream_ops.write,mmap:MEMFS.stream_ops.mmap,msync:MEMFS.stream_ops.msync}},link:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,readlink:MEMFS.node_ops.readlink},stream:{}},chrdev:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:FS.chrdev_stream_ops}};var node=FS.createNode(parent,name,mode,dev);if(FS.isDir(node.mode)){node.node_ops=MEMFS.ops_table.dir.node;node.stream_ops=MEMFS.ops_table.dir.stream;node.contents={}}else if(FS.isFile(node.mode)){node.node_ops=MEMFS.ops_table.file.node;node.stream_ops=MEMFS.ops_table.file.stream;node.usedBytes=0;node.contents=null}else if(FS.isLink(node.mode)){node.node_ops=MEMFS.ops_table.link.node;node.stream_ops=MEMFS.ops_table.link.stream}else if(FS.isChrdev(node.mode)){node.node_ops=MEMFS.ops_table.chrdev.node;node.stream_ops=MEMFS.ops_table.chrdev.stream}node.atime=node.mtime=node.ctime=Date.now();if(parent){parent.contents[name]=node;parent.atime=parent.mtime=parent.ctime=node.atime}return node},getFileDataAsTypedArray(node){if(!node.contents)return new Uint8Array(0);if(node.contents.subarray)return node.contents.subarray(0,node.usedBytes);return new Uint8Array(node.contents)},expandFileStorage(node,newCapacity){var prevCapacity=node.contents?node.contents.length:0;if(prevCapacity>=newCapacity)return;var CAPACITY_DOUBLING_MAX=1024*1024;newCapacity=Math.max(newCapacity,prevCapacity*(prevCapacity>>0);if(prevCapacity!=0)newCapacity=Math.max(newCapacity,256);var oldContents=node.contents;node.contents=new Uint8Array(newCapacity);if(node.usedBytes>0)node.contents.set(oldContents.subarray(0,node.usedBytes),0)},resizeFileStorage(node,newSize){if(node.usedBytes==newSize)return;if(newSize==0){node.contents=null;node.usedBytes=0}else{var oldContents=node.contents;node.contents=new Uint8Array(newSize);if(oldContents){node.contents.set(oldContents.subarray(0,Math.min(newSize,node.usedBytes)))}node.usedBytes=newSize}},node_ops:{getattr(node){var attr={};attr.dev=FS.isChrdev(node.mode)?node.id:1;attr.ino=node.id;attr.mode=node.mode;attr.nlink=1;attr.uid=0;attr.gid=0;attr.rdev=node.rdev;if(FS.isDir(node.mode)){attr.size=4096}else if(FS.isFile(node.mode)){attr.size=node.usedBytes}else if(FS.isLink(node.mode)){attr.size=node.link.length}else{attr.size=0}attr.atime=new Date(node.atime);attr.mtime=new Date(node.mtime);attr.ctime=new Date(node.ctime);attr.blksize=4096;attr.blocks=Math.ceil(attr.size/attr.blksize);return attr},setattr(node,attr){for(const key of["mode","atime","mtime","ctime"]){if(attr[key]!=null){node[key]=attr[key]}}if(attr.size!==undefined){MEMFS.resizeFileStorage(node,attr.size)}},lookup(parent,name){if(!MEMFS.doesNotExistError){MEMFS.doesNotExistError=new FS.ErrnoError(44);MEMFS.doesNotExistError.stack=""}throw MEMFS.doesNotExistError},mknod(parent,name,mode,dev){return MEMFS.createNode(parent,name,mode,dev)},rename(old_node,new_dir,new_name){var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(new_node){if(FS.isDir(old_node.mode)){for(var i in new_node.contents){throw new FS.ErrnoError(55)}}FS.hashRemoveNode(new_node)}delete old_node.parent.contents[old_node.name];new_dir.contents[new_name]=old_node;old_node.name=new_name;new_dir.ctime=new_dir.mtime=old_node.parent.ctime=old_node.parent.mtime=Date.now()},unlink(parent,name){delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},rmdir(parent,name){var node=FS.lookupNode(parent,name);for(var i in node.contents){throw new FS.ErrnoError(55)}delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},readdir(node){return[".","..",...Object.keys(node.contents)]},symlink(parent,newname,oldpath){var node=MEMFS.createNode(parent,newname,511|40960,0);node.link=oldpath;return node},readlink(node){if(!FS.isLink(node.mode)){throw new FS.ErrnoError(28)}return node.link}},stream_ops:{read(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=stream.node.usedBytes)return 0;var size=Math.min(stream.node.usedBytes-position,length);if(size>8&&contents.subarray){buffer.set(contents.subarray(position,position+size),offset)}else{for(var i=0;i0||position+length{var flagModes={r:0,"r+":2,w:512|64|1,"w+":512|64|2,a:1024|64|1,"a+":1024|64|2};var flags=flagModes[str];if(typeof flags=="undefined"){throw new Error(`Unknown file open mode: ${str}`)}return flags};var FS_getMode=(canRead,canWrite)=>{var mode=0;if(canRead)mode|=292|73;if(canWrite)mode|=146;return mode};var asyncLoad=async url=>{var arrayBuffer=await readAsync(url);return new Uint8Array(arrayBuffer)};var FS_createDataFile=(...args)=>FS.createDataFile(...args);var getUniqueRunDependency=id=>id;var preloadPlugins=[];var FS_handledByPreloadPlugin=async(byteArray,fullname)=>{if(typeof Browser!="undefined")Browser.init();for(var plugin of preloadPlugins){if(plugin["canHandle"](fullname)){return plugin["handle"](byteArray,fullname)}}return byteArray};var FS_preloadFile=async(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish)=>{var fullname=name?PATH_FS.resolve(PATH.join2(parent,name)):parent;var dep=getUniqueRunDependency(`cp ${fullname}`);addRunDependency(dep);try{var byteArray=url;if(typeof url=="string"){byteArray=await asyncLoad(url)}byteArray=await FS_handledByPreloadPlugin(byteArray,fullname);preFinish?.();if(!dontCreateFile){FS_createDataFile(parent,name,byteArray,canRead,canWrite,canOwn)}}finally{removeRunDependency(dep)}};var FS_createPreloadedFile=(parent,name,url,canRead,canWrite,onload,onerror,dontCreateFile,canOwn,preFinish)=>{FS_preloadFile(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish).then(onload).catch(onerror)};var FS={root:null,mounts:[],devices:{},streams:[],nextInode:1,nameTable:null,currentPath:"/",initialized:false,ignorePermissions:true,filesystems:null,syncFSRequests:0,readFiles:{},ErrnoError:class{name="ErrnoError";constructor(errno){this.errno=errno}},FSStream:class{shared={};get object(){return this.node}set object(val){this.node=val}get isRead(){return(this.flags&2097155)!==1}get isWrite(){return(this.flags&2097155)!==0}get isAppend(){return this.flags&1024}get flags(){return this.shared.flags}set flags(val){this.shared.flags=val}get position(){return this.shared.position}set position(val){this.shared.position=val}},FSNode:class{node_ops={};stream_ops={};readMode=292|73;writeMode=146;mounted=null;constructor(parent,name,mode,rdev){if(!parent){parent=this}this.parent=parent;this.mount=parent.mount;this.id=FS.nextInode++;this.name=name;this.mode=mode;this.rdev=rdev;this.atime=this.mtime=this.ctime=Date.now()}get read(){return(this.mode&this.readMode)===this.readMode}set read(val){val?this.mode|=this.readMode:this.mode&=~this.readMode}get write(){return(this.mode&this.writeMode)===this.writeMode}set write(val){val?this.mode|=this.writeMode:this.mode&=~this.writeMode}get isFolder(){return FS.isDir(this.mode)}get isDevice(){return FS.isChrdev(this.mode)}},lookupPath(path,opts={}){if(!path){throw new FS.ErrnoError(44)}opts.follow_mount??=true;if(!PATH.isAbs(path)){path=FS.cwd()+"/"+path}linkloop:for(var nlinks=0;nlinks<40;nlinks++){var parts=path.split("/").filter(p=>!!p);var current=FS.root;var current_path="/";for(var i=0;i>>0)%FS.nameTable.length},hashAddNode(node){var hash=FS.hashName(node.parent.id,node.name);node.name_next=FS.nameTable[hash];FS.nameTable[hash]=node},hashRemoveNode(node){var hash=FS.hashName(node.parent.id,node.name);if(FS.nameTable[hash]===node){FS.nameTable[hash]=node.name_next}else{var current=FS.nameTable[hash];while(current){if(current.name_next===node){current.name_next=node.name_next;break}current=current.name_next}}},lookupNode(parent,name){var errCode=FS.mayLookup(parent);if(errCode){throw new FS.ErrnoError(errCode)}var hash=FS.hashName(parent.id,name);for(var node=FS.nameTable[hash];node;node=node.name_next){var nodeName=node.name;if(node.parent.id===parent.id&&nodeName===name){return node}}return FS.lookup(parent,name)},createNode(parent,name,mode,rdev){var node=new FS.FSNode(parent,name,mode,rdev);FS.hashAddNode(node);return node},destroyNode(node){FS.hashRemoveNode(node)},isRoot(node){return node===node.parent},isMountpoint(node){return!!node.mounted},isFile(mode){return(mode&61440)===32768},isDir(mode){return(mode&61440)===16384},isLink(mode){return(mode&61440)===40960},isChrdev(mode){return(mode&61440)===8192},isBlkdev(mode){return(mode&61440)===24576},isFIFO(mode){return(mode&61440)===4096},isSocket(mode){return(mode&49152)===49152},flagsToPermissionString(flag){var perms=["r","w","rw"][flag&3];if(flag&512){perms+="w"}return perms},nodePermissions(node,perms){if(FS.ignorePermissions){return 0}if(perms.includes("r")&&!(node.mode&292)){return 2}else if(perms.includes("w")&&!(node.mode&146)){return 2}else if(perms.includes("x")&&!(node.mode&73)){return 2}return 0},mayLookup(dir){if(!FS.isDir(dir.mode))return 54;var errCode=FS.nodePermissions(dir,"x");if(errCode)return errCode;if(!dir.node_ops.lookup)return 2;return 0},mayCreate(dir,name){if(!FS.isDir(dir.mode)){return 54}try{var node=FS.lookupNode(dir,name);return 20}catch(e){}return FS.nodePermissions(dir,"wx")},mayDelete(dir,name,isdir){var node;try{node=FS.lookupNode(dir,name)}catch(e){return e.errno}var errCode=FS.nodePermissions(dir,"wx");if(errCode){return errCode}if(isdir){if(!FS.isDir(node.mode)){return 54}if(FS.isRoot(node)||FS.getPath(node)===FS.cwd()){return 10}}else{if(FS.isDir(node.mode)){return 31}}return 0},mayOpen(node,flags){if(!node){return 44}if(FS.isLink(node.mode)){return 32}else if(FS.isDir(node.mode)){if(FS.flagsToPermissionString(flags)!=="r"||flags&(512|64)){return 31}}return FS.nodePermissions(node,FS.flagsToPermissionString(flags))},checkOpExists(op,err){if(!op){throw new FS.ErrnoError(err)}return op},MAX_OPEN_FDS:4096,nextfd(){for(var fd=0;fd<=FS.MAX_OPEN_FDS;fd++){if(!FS.streams[fd]){return fd}}throw new FS.ErrnoError(33)},getStreamChecked(fd){var stream=FS.getStream(fd);if(!stream){throw new FS.ErrnoError(8)}return stream},getStream:fd=>FS.streams[fd],createStream(stream,fd=-1){stream=Object.assign(new FS.FSStream,stream);if(fd==-1){fd=FS.nextfd()}stream.fd=fd;FS.streams[fd]=stream;return stream},closeStream(fd){FS.streams[fd]=null},dupStream(origStream,fd=-1){var stream=FS.createStream(origStream,fd);stream.stream_ops?.dup?.(stream);return stream},doSetAttr(stream,node,attr){var setattr=stream?.stream_ops.setattr;var arg=setattr?stream:node;setattr??=node.node_ops.setattr;FS.checkOpExists(setattr,63);setattr(arg,attr)},chrdev_stream_ops:{open(stream){var device=FS.getDevice(stream.node.rdev);stream.stream_ops=device.stream_ops;stream.stream_ops.open?.(stream)},llseek(){throw new FS.ErrnoError(70)}},major:dev=>dev>>8,minor:dev=>dev&255,makedev:(ma,mi)=>ma<<8|mi,registerDevice(dev,ops){FS.devices[dev]={stream_ops:ops}},getDevice:dev=>FS.devices[dev],getMounts(mount){var mounts=[];var check=[mount];while(check.length){var m=check.pop();mounts.push(m);check.push(...m.mounts)}return mounts},syncfs(populate,callback){if(typeof populate=="function"){callback=populate;populate=false}FS.syncFSRequests++;if(FS.syncFSRequests>1){err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`)}var mounts=FS.getMounts(FS.root.mount);var completed=0;function doCallback(errCode){FS.syncFSRequests--;return callback(errCode)}function done(errCode){if(errCode){if(!done.errored){done.errored=true;return doCallback(errCode)}return}if(++completed>=mounts.length){doCallback(null)}}for(var mount of mounts){if(mount.type.syncfs){mount.type.syncfs(mount,populate,done)}else{done(null)}}},mount(type,opts,mountpoint){var root=mountpoint==="/";var pseudo=!mountpoint;var node;if(root&&FS.root){throw new FS.ErrnoError(10)}else if(!root&&!pseudo){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});mountpoint=lookup.path;node=lookup.node;if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}if(!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}}var mount={type,opts,mountpoint,mounts:[]};var mountRoot=type.mount(mount);mountRoot.mount=mount;mount.root=mountRoot;if(root){FS.root=mountRoot}else if(node){node.mounted=mount;if(node.mount){node.mount.mounts.push(mount)}}return mountRoot},unmount(mountpoint){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});if(!FS.isMountpoint(lookup.node)){throw new FS.ErrnoError(28)}var node=lookup.node;var mount=node.mounted;var mounts=FS.getMounts(mount);for(var[hash,current]of Object.entries(FS.nameTable)){while(current){var next=current.name_next;if(mounts.includes(current.mount)){FS.destroyNode(current)}current=next}}node.mounted=null;var idx=node.mount.mounts.indexOf(mount);node.mount.mounts.splice(idx,1)},lookup(parent,name){return parent.node_ops.lookup(parent,name)},mknod(path,mode,dev){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);if(!name){throw new FS.ErrnoError(28)}if(name==="."||name===".."){throw new FS.ErrnoError(20)}var errCode=FS.mayCreate(parent,name);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.mknod){throw new FS.ErrnoError(63)}return parent.node_ops.mknod(parent,name,mode,dev)},statfs(path){return FS.statfsNode(FS.lookupPath(path,{follow:true}).node)},statfsStream(stream){return FS.statfsNode(stream.node)},statfsNode(node){var rtn={bsize:4096,frsize:4096,blocks:1e6,bfree:5e5,bavail:5e5,files:FS.nextInode,ffree:FS.nextInode-1,fsid:42,flags:2,namelen:255};if(node.node_ops.statfs){Object.assign(rtn,node.node_ops.statfs(node.mount.opts.root))}return rtn},create(path,mode=438){mode&=4095;mode|=32768;return FS.mknod(path,mode,0)},mkdir(path,mode=511){mode&=511|512;mode|=16384;return FS.mknod(path,mode,0)},mkdirTree(path,mode){var dirs=path.split("/");var d="";for(var dir of dirs){if(!dir)continue;if(d||PATH.isAbs(path))d+="/";d+=dir;try{FS.mkdir(d,mode)}catch(e){if(e.errno!=20)throw e}}},mkdev(path,mode,dev){if(typeof dev=="undefined"){dev=mode;mode=438}mode|=8192;return FS.mknod(path,mode,dev)},symlink(oldpath,newpath){if(!PATH_FS.resolve(oldpath)){throw new FS.ErrnoError(44)}var lookup=FS.lookupPath(newpath,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var newname=PATH.basename(newpath);var errCode=FS.mayCreate(parent,newname);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.symlink){throw new FS.ErrnoError(63)}return parent.node_ops.symlink(parent,newname,oldpath)},rename(old_path,new_path){var old_dirname=PATH.dirname(old_path);var new_dirname=PATH.dirname(new_path);var old_name=PATH.basename(old_path);var new_name=PATH.basename(new_path);var lookup,old_dir,new_dir;lookup=FS.lookupPath(old_path,{parent:true});old_dir=lookup.node;lookup=FS.lookupPath(new_path,{parent:true});new_dir=lookup.node;if(!old_dir||!new_dir)throw new FS.ErrnoError(44);if(old_dir.mount!==new_dir.mount){throw new FS.ErrnoError(75)}var old_node=FS.lookupNode(old_dir,old_name);var relative=PATH_FS.relative(old_path,new_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(28)}relative=PATH_FS.relative(new_path,old_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(55)}var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(old_node===new_node){return}var isdir=FS.isDir(old_node.mode);var errCode=FS.mayDelete(old_dir,old_name,isdir);if(errCode){throw new FS.ErrnoError(errCode)}errCode=new_node?FS.mayDelete(new_dir,new_name,isdir):FS.mayCreate(new_dir,new_name);if(errCode){throw new FS.ErrnoError(errCode)}if(!old_dir.node_ops.rename){throw new FS.ErrnoError(63)}if(FS.isMountpoint(old_node)||new_node&&FS.isMountpoint(new_node)){throw new FS.ErrnoError(10)}if(new_dir!==old_dir){errCode=FS.nodePermissions(old_dir,"w");if(errCode){throw new FS.ErrnoError(errCode)}}FS.hashRemoveNode(old_node);try{old_dir.node_ops.rename(old_node,new_dir,new_name);old_node.parent=new_dir}catch(e){throw e}finally{FS.hashAddNode(old_node)}},rmdir(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,true);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.rmdir){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.rmdir(parent,name);FS.destroyNode(node)},readdir(path){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var readdir=FS.checkOpExists(node.node_ops.readdir,54);return readdir(node)},unlink(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,false);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.unlink){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.unlink(parent,name);FS.destroyNode(node)},readlink(path){var lookup=FS.lookupPath(path);var link=lookup.node;if(!link){throw new FS.ErrnoError(44)}if(!link.node_ops.readlink){throw new FS.ErrnoError(28)}return link.node_ops.readlink(link)},stat(path,dontFollow){var lookup=FS.lookupPath(path,{follow:!dontFollow});var node=lookup.node;var getattr=FS.checkOpExists(node.node_ops.getattr,63);return getattr(node)},fstat(fd){var stream=FS.getStreamChecked(fd);var node=stream.node;var getattr=stream.stream_ops.getattr;var arg=getattr?stream:node;getattr??=node.node_ops.getattr;FS.checkOpExists(getattr,63);return getattr(arg)},lstat(path){return FS.stat(path,true)},doChmod(stream,node,mode,dontFollow){FS.doSetAttr(stream,node,{mode:mode&4095|node.mode&~4095,ctime:Date.now(),dontFollow})},chmod(path,mode,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChmod(null,node,mode,dontFollow)},lchmod(path,mode){FS.chmod(path,mode,true)},fchmod(fd,mode){var stream=FS.getStreamChecked(fd);FS.doChmod(stream,stream.node,mode,false)},doChown(stream,node,dontFollow){FS.doSetAttr(stream,node,{timestamp:Date.now(),dontFollow})},chown(path,uid,gid,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChown(null,node,dontFollow)},lchown(path,uid,gid){FS.chown(path,uid,gid,true)},fchown(fd,uid,gid){var stream=FS.getStreamChecked(fd);FS.doChown(stream,stream.node,false)},doTruncate(stream,node,len){if(FS.isDir(node.mode)){throw new FS.ErrnoError(31)}if(!FS.isFile(node.mode)){throw new FS.ErrnoError(28)}var errCode=FS.nodePermissions(node,"w");if(errCode){throw new FS.ErrnoError(errCode)}FS.doSetAttr(stream,node,{size:len,timestamp:Date.now()})},truncate(path,len){if(len<0){throw new FS.ErrnoError(28)}var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:true});node=lookup.node}else{node=path}FS.doTruncate(null,node,len)},ftruncate(fd,len){var stream=FS.getStreamChecked(fd);if(len<0||(stream.flags&2097155)===0){throw new FS.ErrnoError(28)}FS.doTruncate(stream,stream.node,len)},utime(path,atime,mtime){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var setattr=FS.checkOpExists(node.node_ops.setattr,63);setattr(node,{atime,mtime})},open(path,flags,mode=438){if(path===""){throw new FS.ErrnoError(44)}flags=typeof flags=="string"?FS_modeStringToFlags(flags):flags;if(flags&64){mode=mode&4095|32768}else{mode=0}var node;var isDirPath;if(typeof path=="object"){node=path}else{isDirPath=path.endsWith("/");var lookup=FS.lookupPath(path,{follow:!(flags&131072),noent_okay:true});node=lookup.node;path=lookup.path}var created=false;if(flags&64){if(node){if(flags&128){throw new FS.ErrnoError(20)}}else if(isDirPath){throw new FS.ErrnoError(31)}else{node=FS.mknod(path,mode|511,0);created=true}}if(!node){throw new FS.ErrnoError(44)}if(FS.isChrdev(node.mode)){flags&=~512}if(flags&65536&&!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}if(!created){var errCode=FS.mayOpen(node,flags);if(errCode){throw new FS.ErrnoError(errCode)}}if(flags&512&&!created){FS.truncate(node,0)}flags&=~(128|512|131072);var stream=FS.createStream({node,path:FS.getPath(node),flags,seekable:true,position:0,stream_ops:node.stream_ops,ungotten:[],error:false});if(stream.stream_ops.open){stream.stream_ops.open(stream)}if(created){FS.chmod(node,mode&511)}if(Module["logReadFiles"]&&!(flags&1)){if(!(path in FS.readFiles)){FS.readFiles[path]=1}}return stream},close(stream){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(stream.getdents)stream.getdents=null;try{if(stream.stream_ops.close){stream.stream_ops.close(stream)}}catch(e){throw e}finally{FS.closeStream(stream.fd)}stream.fd=null},isClosed(stream){return stream.fd===null},llseek(stream,offset,whence){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(!stream.seekable||!stream.stream_ops.llseek){throw new FS.ErrnoError(70)}if(whence!=0&&whence!=1&&whence!=2){throw new FS.ErrnoError(28)}stream.position=stream.stream_ops.llseek(stream,offset,whence);stream.ungotten=[];return stream.position},read(stream,buffer,offset,length,position){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.read){throw new FS.ErrnoError(28)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesRead=stream.stream_ops.read(stream,buffer,offset,length,position);if(!seeking)stream.position+=bytesRead;return bytesRead},write(stream,buffer,offset,length,position,canOwn){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===0){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.write){throw new FS.ErrnoError(28)}if(stream.seekable&&stream.flags&1024){FS.llseek(stream,0,2)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesWritten=stream.stream_ops.write(stream,buffer,offset,length,position,canOwn);if(!seeking)stream.position+=bytesWritten;return bytesWritten},mmap(stream,length,position,prot,flags){if((prot&2)!==0&&(flags&2)===0&&(stream.flags&2097155)!==2){throw new FS.ErrnoError(2)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(2)}if(!stream.stream_ops.mmap){throw new FS.ErrnoError(43)}if(!length){throw new FS.ErrnoError(28)}return stream.stream_ops.mmap(stream,length,position,prot,flags)},msync(stream,buffer,offset,length,mmapFlags){if(!stream.stream_ops.msync){return 0}return stream.stream_ops.msync(stream,buffer,offset,length,mmapFlags)},ioctl(stream,cmd,arg){if(!stream.stream_ops.ioctl){throw new FS.ErrnoError(59)}return stream.stream_ops.ioctl(stream,cmd,arg)},readFile(path,opts={}){opts.flags=opts.flags||0;opts.encoding=opts.encoding||"binary";if(opts.encoding!=="utf8"&&opts.encoding!=="binary"){abort(`Invalid encoding type "${opts.encoding}"`)}var stream=FS.open(path,opts.flags);var stat=FS.stat(path);var length=stat.size;var buf=new Uint8Array(length);FS.read(stream,buf,0,length,0);if(opts.encoding==="utf8"){buf=UTF8ArrayToString(buf)}FS.close(stream);return buf},writeFile(path,data,opts={}){opts.flags=opts.flags||577;var stream=FS.open(path,opts.flags,opts.mode);if(typeof data=="string"){data=new Uint8Array(intArrayFromString(data,true))}if(ArrayBuffer.isView(data)){FS.write(stream,data,0,data.byteLength,undefined,opts.canOwn)}else{abort("Unsupported data type")}FS.close(stream)},cwd:()=>FS.currentPath,chdir(path){var lookup=FS.lookupPath(path,{follow:true});if(lookup.node===null){throw new FS.ErrnoError(44)}if(!FS.isDir(lookup.node.mode)){throw new FS.ErrnoError(54)}var errCode=FS.nodePermissions(lookup.node,"x");if(errCode){throw new FS.ErrnoError(errCode)}FS.currentPath=lookup.path},createDefaultDirectories(){FS.mkdir("/tmp");FS.mkdir("/home");FS.mkdir("/home/web_user")},createDefaultDevices(){FS.mkdir("/dev");FS.registerDevice(FS.makedev(1,3),{read:()=>0,write:(stream,buffer,offset,length,pos)=>length,llseek:()=>0});FS.mkdev("/dev/null",FS.makedev(1,3));TTY.register(FS.makedev(5,0),TTY.default_tty_ops);TTY.register(FS.makedev(6,0),TTY.default_tty1_ops);FS.mkdev("/dev/tty",FS.makedev(5,0));FS.mkdev("/dev/tty1",FS.makedev(6,0));var randomBuffer=new Uint8Array(1024),randomLeft=0;var randomByte=()=>{if(randomLeft===0){randomFill(randomBuffer);randomLeft=randomBuffer.byteLength}return randomBuffer[--randomLeft]};FS.createDevice("/dev","random",randomByte);FS.createDevice("/dev","urandom",randomByte);FS.mkdir("/dev/shm");FS.mkdir("/dev/shm/tmp")},createSpecialDirectories(){FS.mkdir("/proc");var proc_self=FS.mkdir("/proc/self");FS.mkdir("/proc/self/fd");FS.mount({mount(){var node=FS.createNode(proc_self,"fd",16895,73);node.stream_ops={llseek:MEMFS.stream_ops.llseek};node.node_ops={lookup(parent,name){var fd=+name;var stream=FS.getStreamChecked(fd);var ret={parent:null,mount:{mountpoint:"fake"},node_ops:{readlink:()=>stream.path},id:fd+1};ret.parent=ret;return ret},readdir(){return Array.from(FS.streams.entries()).filter(([k,v])=>v).map(([k,v])=>k.toString())}};return node}},{},"/proc/self/fd")},createStandardStreams(input,output,error){if(input){FS.createDevice("/dev","stdin",input)}else{FS.symlink("/dev/tty","/dev/stdin")}if(output){FS.createDevice("/dev","stdout",null,output)}else{FS.symlink("/dev/tty","/dev/stdout")}if(error){FS.createDevice("/dev","stderr",null,error)}else{FS.symlink("/dev/tty1","/dev/stderr")}var stdin=FS.open("/dev/stdin",0);var stdout=FS.open("/dev/stdout",1);var stderr=FS.open("/dev/stderr",1)},staticInit(){FS.nameTable=new Array(4096);FS.mount(MEMFS,{},"/");FS.createDefaultDirectories();FS.createDefaultDevices();FS.createSpecialDirectories();FS.filesystems={MEMFS}},init(input,output,error){FS.initialized=true;input??=Module["stdin"];output??=Module["stdout"];error??=Module["stderr"];FS.createStandardStreams(input,output,error)},quit(){FS.initialized=false;for(var stream of FS.streams){if(stream){FS.close(stream)}}},findObject(path,dontResolveLastLink){var ret=FS.analyzePath(path,dontResolveLastLink);if(!ret.exists){return null}return ret.object},analyzePath(path,dontResolveLastLink){try{var lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});path=lookup.path}catch(e){}var ret={isRoot:false,exists:false,error:0,name:null,path:null,object:null,parentExists:false,parentPath:null,parentObject:null};try{var lookup=FS.lookupPath(path,{parent:true});ret.parentExists=true;ret.parentPath=lookup.path;ret.parentObject=lookup.node;ret.name=PATH.basename(path);lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});ret.exists=true;ret.path=lookup.path;ret.object=lookup.node;ret.name=lookup.node.name;ret.isRoot=lookup.path==="/"}catch(e){ret.error=e.errno}return ret},createPath(parent,path,canRead,canWrite){parent=typeof parent=="string"?parent:FS.getPath(parent);var parts=path.split("/").reverse();while(parts.length){var part=parts.pop();if(!part)continue;var current=PATH.join2(parent,part);try{FS.mkdir(current)}catch(e){if(e.errno!=20)throw e}parent=current}return current},createFile(parent,name,properties,canRead,canWrite){var path=PATH.join2(typeof parent=="string"?parent:FS.getPath(parent),name);var mode=FS_getMode(canRead,canWrite);return FS.create(path,mode)},createDataFile(parent,name,data,canRead,canWrite,canOwn){var path=name;if(parent){parent=typeof parent=="string"?parent:FS.getPath(parent);path=name?PATH.join2(parent,name):parent}var mode=FS_getMode(canRead,canWrite);var node=FS.create(path,mode);if(data){if(typeof data=="string"){var arr=new Array(data.length);for(var i=0,len=data.length;ithis.length-1||idx<0){return undefined}var chunkOffset=idx%this.chunkSize;var chunkNum=idx/this.chunkSize|0;return this.getter(chunkNum)[chunkOffset]}setDataGetter(getter){this.getter=getter}cacheLength(){var xhr=new XMLHttpRequest;xhr.open("HEAD",url,false);xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);var datalength=Number(xhr.getResponseHeader("Content-length"));var header;var hasByteServing=(header=xhr.getResponseHeader("Accept-Ranges"))&&header==="bytes";var usesGzip=(header=xhr.getResponseHeader("Content-Encoding"))&&header==="gzip";var chunkSize=1024*1024;if(!hasByteServing)chunkSize=datalength;var doXHR=(from,to)=>{if(from>to)abort("invalid range ("+from+", "+to+") or no bytes requested!");if(to>datalength-1)abort("only "+datalength+" bytes available! programmer error!");var xhr=new XMLHttpRequest;xhr.open("GET",url,false);if(datalength!==chunkSize)xhr.setRequestHeader("Range","bytes="+from+"-"+to);xhr.responseType="arraybuffer";if(xhr.overrideMimeType){xhr.overrideMimeType("text/plain; charset=x-user-defined")}xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);if(xhr.response!==undefined){return new Uint8Array(xhr.response||[])}return intArrayFromString(xhr.responseText||"",true)};var lazyArray=this;lazyArray.setDataGetter(chunkNum=>{var start=chunkNum*chunkSize;var end=(chunkNum+1)*chunkSize-1;end=Math.min(end,datalength-1);if(typeof lazyArray.chunks[chunkNum]=="undefined"){lazyArray.chunks[chunkNum]=doXHR(start,end)}if(typeof lazyArray.chunks[chunkNum]=="undefined")abort("doXHR failed!");return lazyArray.chunks[chunkNum]});if(usesGzip||!datalength){chunkSize=datalength=1;datalength=this.getter(0).length;chunkSize=datalength;out("LazyFiles on gzip forces download of the whole file when length is accessed")}this._length=datalength;this._chunkSize=chunkSize;this.lengthKnown=true}get length(){if(!this.lengthKnown){this.cacheLength()}return this._length}get chunkSize(){if(!this.lengthKnown){this.cacheLength()}return this._chunkSize}}if(globalThis.XMLHttpRequest){if(!ENVIRONMENT_IS_WORKER)abort("Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc");var lazyArray=new LazyUint8Array;var properties={isDevice:false,contents:lazyArray}}else{var properties={isDevice:false,url}}var node=FS.createFile(parent,name,properties,canRead,canWrite);if(properties.contents){node.contents=properties.contents}else if(properties.url){node.contents=null;node.url=properties.url}Object.defineProperties(node,{usedBytes:{get:function(){return this.contents.length}}});var stream_ops={};for(const[key,fn]of Object.entries(node.stream_ops)){stream_ops[key]=(...args)=>{FS.forceLoadFile(node);return fn(...args)}}function writeChunks(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=contents.length)return 0;var size=Math.min(contents.length-position,length);if(contents.slice){for(var i=0;i{FS.forceLoadFile(node);return writeChunks(stream,buffer,offset,length,position)};stream_ops.mmap=(stream,length,position,prot,flags)=>{FS.forceLoadFile(node);var ptr=mmapAlloc(length);if(!ptr){throw new FS.ErrnoError(48)}writeChunks(stream,(growMemViews(),HEAP8),ptr,length,position);return{ptr,allocated:true}};node.stream_ops=stream_ops;return node}};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString((growMemViews(),HEAPU8),ptr,maxBytesToRead,ignoreNul):"";var SYSCALLS={DEFAULT_POLLMASK:5,calculateAt(dirfd,path,allowEmpty){if(PATH.isAbs(path)){return path}var dir;if(dirfd===-100){dir=FS.cwd()}else{var dirstream=SYSCALLS.getStreamFromFD(dirfd);dir=dirstream.path}if(path.length==0){if(!allowEmpty){throw new FS.ErrnoError(44)}return dir}return dir+"/"+path},writeStat(buf,stat){(growMemViews(),HEAPU32)[buf/4]=stat.dev;(growMemViews(),HEAPU32)[(buf+4)/4]=stat.mode;(growMemViews(),HEAPU64)[(buf+8)/8]=BigInt(stat.nlink);(growMemViews(),HEAPU32)[(buf+16)/4]=stat.uid;(growMemViews(),HEAPU32)[(buf+20)/4]=stat.gid;(growMemViews(),HEAPU32)[(buf+24)/4]=stat.rdev;(growMemViews(),HEAP64)[(buf+32)/8]=BigInt(stat.size);(growMemViews(),HEAP32)[(buf+40)/4]=4096;(growMemViews(),HEAP32)[(buf+44)/4]=stat.blocks;var atime=stat.atime.getTime();var mtime=stat.mtime.getTime();var ctime=stat.ctime.getTime();(growMemViews(),HEAP64)[(buf+48)/8]=BigInt(Math.floor(atime/1e3));(growMemViews(),HEAPU64)[(buf+56)/8]=BigInt(atime%1e3*1e3*1e3);(growMemViews(),HEAP64)[(buf+64)/8]=BigInt(Math.floor(mtime/1e3));(growMemViews(),HEAPU64)[(buf+72)/8]=BigInt(mtime%1e3*1e3*1e3);(growMemViews(),HEAP64)[(buf+80)/8]=BigInt(Math.floor(ctime/1e3));(growMemViews(),HEAPU64)[(buf+88)/8]=BigInt(ctime%1e3*1e3*1e3);(growMemViews(),HEAP64)[(buf+96)/8]=BigInt(stat.ino);return 0},writeStatFs(buf,stats){(growMemViews(),HEAPU32)[(buf+8)/4]=stats.bsize;(growMemViews(),HEAPU32)[(buf+72)/4]=stats.bsize;(growMemViews(),HEAP64)[(buf+16)/8]=BigInt(stats.blocks);(growMemViews(),HEAP64)[(buf+24)/8]=BigInt(stats.bfree);(growMemViews(),HEAP64)[(buf+32)/8]=BigInt(stats.bavail);(growMemViews(),HEAP64)[(buf+40)/8]=BigInt(stats.files);(growMemViews(),HEAP64)[(buf+48)/8]=BigInt(stats.ffree);(growMemViews(),HEAPU32)[(buf+56)/4]=stats.fsid;(growMemViews(),HEAPU32)[(buf+80)/4]=stats.flags;(growMemViews(),HEAPU32)[(buf+64)/4]=stats.namelen},doMsync(addr,stream,len,flags,offset){if(!FS.isFile(stream.node.mode)){throw new FS.ErrnoError(43)}if(flags&2){return 0}var buffer=(growMemViews(),HEAPU8).slice(addr,addr+len);FS.msync(stream,buffer,offset,len,flags)},getStreamFromFD(fd){var stream=FS.getStreamChecked(fd);return stream},varargs:undefined,getStr(ptr){var ret=UTF8ToString(ptr);return ret}};function ___syscall_fcntl64(fd,cmd,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(3,0,1,fd,cmd,varargs);varargs=bigintToI53Checked(varargs);SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(cmd){case 0:{var arg=syscallGetVarargI();if(arg<0){return-28}while(FS.streams[arg]){arg++}var newStream;newStream=FS.dupStream(stream,arg);return newStream.fd}case 1:case 2:return 0;case 3:return stream.flags;case 4:{var arg=syscallGetVarargI();stream.flags|=arg;return 0}case 5:{var arg=syscallGetVarargP();var offset=0;(growMemViews(),HEAP16)[(arg+offset)/2]=2;return 0}case 6:case 7:return 0}return-28}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_fstat64(fd,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(4,0,1,fd,buf);buf=bigintToI53Checked(buf);try{return SYSCALLS.writeStat(buf,FS.fstat(fd))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_ioctl(fd,op,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(5,0,1,fd,op,varargs);varargs=bigintToI53Checked(varargs);SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(op){case 21509:{if(!stream.tty)return-59;return 0}case 21505:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcgets){var termios=stream.tty.ops.ioctl_tcgets(stream);var argp=syscallGetVarargP();(growMemViews(),HEAP32)[argp/4]=termios.c_iflag||0;(growMemViews(),HEAP32)[(argp+4)/4]=termios.c_oflag||0;(growMemViews(),HEAP32)[(argp+8)/4]=termios.c_cflag||0;(growMemViews(),HEAP32)[(argp+12)/4]=termios.c_lflag||0;for(var i=0;i<32;i++){(growMemViews(),HEAP8)[argp+i+17]=termios.c_cc[i]||0}return 0}return 0}case 21510:case 21511:case 21512:{if(!stream.tty)return-59;return 0}case 21506:case 21507:case 21508:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcsets){var argp=syscallGetVarargP();var c_iflag=(growMemViews(),HEAP32)[argp/4];var c_oflag=(growMemViews(),HEAP32)[(argp+4)/4];var c_cflag=(growMemViews(),HEAP32)[(argp+8)/4];var c_lflag=(growMemViews(),HEAP32)[(argp+12)/4];var c_cc=[];for(var i=0;i<32;i++){c_cc.push((growMemViews(),HEAP8)[argp+i+17])}return stream.tty.ops.ioctl_tcsets(stream.tty,op,{c_iflag,c_oflag,c_cflag,c_lflag,c_cc})}return 0}case 21519:{if(!stream.tty)return-59;var argp=syscallGetVarargP();(growMemViews(),HEAP32)[argp/4]=0;return 0}case 21520:{if(!stream.tty)return-59;return-28}case 21537:case 21531:{var argp=syscallGetVarargP();return FS.ioctl(stream,op,argp)}case 21523:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tiocgwinsz){var winsize=stream.tty.ops.ioctl_tiocgwinsz(stream.tty);var argp=syscallGetVarargP();(growMemViews(),HEAP16)[argp/2]=winsize[0];(growMemViews(),HEAP16)[(argp+2)/2]=winsize[1]}return 0}case 21524:{if(!stream.tty)return-59;return 0}case 21515:{if(!stream.tty)return-59;return 0}default:return-28}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_lstat64(path,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(6,0,1,path,buf);path=bigintToI53Checked(path);buf=bigintToI53Checked(buf);try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.lstat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_newfstatat(dirfd,path,buf,flags){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(7,0,1,dirfd,path,buf,flags);path=bigintToI53Checked(path);buf=bigintToI53Checked(buf);try{path=SYSCALLS.getStr(path);var nofollow=flags&256;var allowEmpty=flags&4096;flags=flags&~6400;path=SYSCALLS.calculateAt(dirfd,path,allowEmpty);return SYSCALLS.writeStat(buf,nofollow?FS.lstat(path):FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_openat(dirfd,path,flags,varargs){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(8,0,1,dirfd,path,flags,varargs);path=bigintToI53Checked(path);varargs=bigintToI53Checked(varargs);SYSCALLS.varargs=varargs;try{path=SYSCALLS.getStr(path);path=SYSCALLS.calculateAt(dirfd,path);var mode=varargs?syscallGetVarargI():0;return FS.open(path,flags,mode).fd}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_stat64(path,buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(9,0,1,path,buf);path=bigintToI53Checked(path);buf=bigintToI53Checked(buf);try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __abort_js=()=>abort("");var structRegistrations={};var runDestructors=destructors=>{while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}};function readPointer(pointer){return this.fromWireType(Number((growMemViews(),HEAPU64)[pointer/8]))}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var InternalError=class InternalError extends Error{constructor(message){super(message);this.name="InternalError"}};var throwInternalError=message=>{throw new InternalError(message)};var whenDependentTypesAreResolved=(myTypes,dependentTypes,getTypeConverters)=>{myTypes.forEach(type=>typeDependencies[type]=dependentTypes);function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}}if(0===unregisteredTypes.length){onComplete(typeConverters)}};var __embind_finalize_value_object=function(structType){structType=bigintToI53Checked(structType);var reg=structRegistrations[structType];delete structRegistrations[structType];var rawConstructor=reg.rawConstructor;var rawDestructor=reg.rawDestructor;var fieldRecords=reg.fields;var fieldTypes=fieldRecords.map(field=>field.getterReturnType).concat(fieldRecords.map(field=>field.setterArgumentType));whenDependentTypesAreResolved([structType],fieldTypes,fieldTypes=>{var fields={};for(var[i,field]of fieldRecords.entries()){const getterReturnType=fieldTypes[i];const getter=field.getter;const getterContext=field.getterContext;const setterArgumentType=fieldTypes[i+fieldRecords.length];const setter=field.setter;const setterContext=field.setterContext;fields[field.fieldName]={read:ptr=>getterReturnType.fromWireType(getter(getterContext,ptr)),write:(ptr,o)=>{var destructors=[];setter(setterContext,ptr,setterArgumentType.toWireType(destructors,o));runDestructors(destructors)},optional:getterReturnType.optional}}return[{name:reg.name,fromWireType:ptr=>{var rv={};for(var i in fields){rv[i]=fields[i].read(ptr)}rawDestructor(ptr);return rv},toWireType:(destructors,o)=>{for(var fieldName in fields){if(!(fieldName in o)&&!fields[fieldName].optional){throw new TypeError(`Missing field: "${fieldName}"`)}}var ptr=rawConstructor();for(fieldName in fields){fields[fieldName].write(ptr,o[fieldName])}if(destructors!==null){destructors.push(rawDestructor,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction:rawDestructor}]})};var AsciiToString=ptr=>{var str="";while(1){var ch=(growMemViews(),HEAPU8)[ptr++];if(!ch)return str;str+=String.fromCharCode(ch)}};var BindingError=class BindingError extends Error{constructor(message){super(message);this.name="BindingError"}};var throwBindingError=message=>{throw new BindingError(message)};function sharedRegisterType(rawType,registeredInstance,options={}){var name=registeredInstance.name;if(!rawType){throwBindingError(`type "${name}" must have a positive integer typeid pointer`)}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError(`Cannot register type '${name}' twice`)}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function registerType(rawType,registeredInstance,options={}){return sharedRegisterType(rawType,registeredInstance,options)}var integerReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?pointer=>(growMemViews(),HEAP8)[pointer]:pointer=>(growMemViews(),HEAPU8)[pointer];case 2:return signed?pointer=>(growMemViews(),HEAP16)[pointer/2]:pointer=>(growMemViews(),HEAPU16)[pointer/2];case 4:return signed?pointer=>(growMemViews(),HEAP32)[pointer/4]:pointer=>(growMemViews(),HEAPU32)[pointer/4];case 8:return signed?pointer=>(growMemViews(),HEAP64)[pointer/8]:pointer=>(growMemViews(),HEAPU64)[pointer/8];default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_bigint=function(primitiveType,name,size,minRange,maxRange){primitiveType=bigintToI53Checked(primitiveType);name=bigintToI53Checked(name);size=bigintToI53Checked(size);name=AsciiToString(name);const isUnsignedType=minRange===0n;let fromWireType=value=>value;if(isUnsignedType){const bitSize=size*8;fromWireType=value=>{if(typeof value=="number"){return value>>>0}return BigInt.asUintN(bitSize,value)};maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>{if(typeof value=="number"){value=BigInt(value)}return value},readValueFromPointer:integerReadValueFromPointer(name,size,!isUnsignedType),destructorFunction:null})};function __embind_register_bool(rawType,name,trueValue,falseValue){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);name=AsciiToString(name);registerType(rawType,{name,fromWireType:function(wt){return!!wt},toWireType:function(destructors,o){return o?trueValue:falseValue},readValueFromPointer:function(pointer){return this.fromWireType((growMemViews(),HEAPU8)[pointer])},destructorFunction:null})}var shallowCopyInternalPointer=o=>({count:o.count,deleteScheduled:o.deleteScheduled,preservePointerOnDelete:o.preservePointerOnDelete,ptr:o.ptr,ptrType:o.ptrType,smartPtr:o.smartPtr,smartPtrType:o.smartPtrType});var throwInstanceAlreadyDeleted=obj=>{function getInstanceTypeName(handle){return handle.$$.ptrType.registeredClass.name}throwBindingError(getInstanceTypeName(obj)+" instance already deleted")};var finalizationRegistry=false;var detachFinalizer=handle=>{};var runDestructor=$$=>{if($$.smartPtr){$$.smartPtrType.rawDestructor($$.smartPtr)}else{$$.ptrType.registeredClass.rawDestructor($$.ptr)}};var releaseClassHandle=$$=>{$$.count.value-=1;var toDelete=0===$$.count.value;if(toDelete){runDestructor($$)}};var attachFinalizer=handle=>{if(!globalThis.FinalizationRegistry){attachFinalizer=handle=>handle;return handle}finalizationRegistry=new FinalizationRegistry(info=>{releaseClassHandle(info.$$)});attachFinalizer=handle=>{var $$=handle.$$;var hasSmartPtr=!!$$.smartPtr;if(hasSmartPtr){var info={$$};finalizationRegistry.register(handle,info,handle)}return handle};detachFinalizer=handle=>finalizationRegistry.unregister(handle);return attachFinalizer(handle)};var deletionQueue=[];var flushPendingDeletes=()=>{while(deletionQueue.length){var obj=deletionQueue.pop();obj.$$.deleteScheduled=false;obj["delete"]()}};var delayFunction;var init_ClassHandle=()=>{let proto=ClassHandle.prototype;Object.assign(proto,{isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(other instanceof ClassHandle)){return false}var leftClass=this.$$.ptrType.registeredClass;var left=this.$$.ptr;other.$$=other.$$;var rightClass=other.$$.ptrType.registeredClass;var right=other.$$.ptr;while(leftClass.baseClass){left=leftClass.upcast(left);leftClass=leftClass.baseClass}while(rightClass.baseClass){right=rightClass.upcast(right);rightClass=rightClass.baseClass}return leftClass===rightClass&&left===right},clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.preservePointerOnDelete){this.$$.count.value+=1;return this}else{var clone=attachFinalizer(Object.create(Object.getPrototypeOf(this),{$$:{value:shallowCopyInternalPointer(this.$$)}}));clone.$$.count.value+=1;clone.$$.deleteScheduled=false;return clone}},delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}detachFinalizer(this);releaseClassHandle(this.$$);if(!this.$$.preservePointerOnDelete){this.$$.smartPtr=undefined;this.$$.ptr=undefined}},isDeleted(){return!this.$$.ptr},deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}deletionQueue.push(this);if(deletionQueue.length===1&&delayFunction){delayFunction(flushPendingDeletes)}this.$$.deleteScheduled=true;return this}});const symbolDispose=Symbol.dispose;if(symbolDispose){proto[symbolDispose]=proto["delete"]}};function ClassHandle(){}var createNamedFunction=(name,func)=>Object.defineProperty(func,"name",{value:name});var registeredPointers={};var ensureOverloadTable=(proto,methodName,humanName)=>{if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(...args){if(!proto[methodName].overloadTable.hasOwnProperty(args.length)){throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`)}return proto[methodName].overloadTable[args.length].apply(this,args)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}};var exposePublicSymbol=(name,value,numArguments)=>{if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError(`Cannot register public name '${name}' twice`)}ensureOverloadTable(Module,name,name);if(Module[name].overloadTable.hasOwnProperty(numArguments)){throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`)}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var char_0=48;var char_9=57;var makeLegalFunctionName=name=>{name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return`_${name}`}return name};function RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast){this.name=name;this.constructor=constructor;this.instancePrototype=instancePrototype;this.rawDestructor=rawDestructor;this.baseClass=baseClass;this.getActualType=getActualType;this.upcast=upcast;this.downcast=downcast;this.pureVirtualFunctions=[]}var upcastPointer=(ptr,ptrClass,desiredClass)=>{while(ptrClass!==desiredClass){if(!ptrClass.upcast){throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`)}ptr=ptrClass.upcast(ptr);ptrClass=ptrClass.baseClass}return ptr};var embindRepr=v=>{if(v===null){return"null"}var t=typeof v;if(t==="object"||t==="array"||t==="function"){return v.toString()}else{return""+v}};function constNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}function genericPointerToWireType(destructors,handle){var ptr;if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}if(this.isSmartPointer){ptr=this.rawConstructor();if(destructors!==null){destructors.push(this.rawDestructor,ptr)}return ptr}else{return 0}}if(!handle||!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(!this.isConst&&handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);if(this.isSmartPointer){if(undefined===handle.$$.smartPtr){throwBindingError("Passing raw pointer to smart pointer is illegal")}switch(this.sharingPolicy){case 0:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}break;case 1:ptr=handle.$$.smartPtr;break;case 2:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{var clonedHandle=handle["clone"]();ptr=this.rawShare(ptr,Emval.toHandle(()=>clonedHandle["delete"]()));if(destructors!==null){destructors.push(this.rawDestructor,ptr)}}break;default:throwBindingError("Unsupporting sharing policy")}}return ptr}function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}var downcastPointer=(ptr,ptrClass,desiredClass)=>{if(ptrClass===desiredClass){return ptr}if(undefined===desiredClass.baseClass){return null}var rv=downcastPointer(ptr,ptrClass,desiredClass.baseClass);if(rv===null){return null}return desiredClass.downcast(rv)};var registeredInstances={};var getBasestPointer=(class_,ptr)=>{if(ptr===undefined){throwBindingError("ptr should not be undefined")}while(class_.baseClass){ptr=class_.upcast(ptr);class_=class_.baseClass}return ptr};var getInheritedInstance=(class_,ptr)=>{ptr=getBasestPointer(class_,ptr);return registeredInstances[ptr]};var makeClassHandle=(prototype,record)=>{if(!record.ptrType||!record.ptr){throwInternalError("makeClassHandle requires ptr and ptrType")}var hasSmartPtrType=!!record.smartPtrType;var hasSmartPtr=!!record.smartPtr;if(hasSmartPtrType!==hasSmartPtr){throwInternalError("Both smartPtrType and smartPtr must be specified")}record.count={value:1};return attachFinalizer(Object.create(prototype,{$$:{value:record,writable:true}}))};function RegisteredPointer_fromWireType(ptr){ptr=bigintToI53Checked(ptr);var rawPointer=this.getPointee(ptr);if(!rawPointer){this.destructor(ptr);return null}var registeredInstance=getInheritedInstance(this.registeredClass,rawPointer);if(undefined!==registeredInstance){if(0===registeredInstance.$$.count.value){registeredInstance.$$.ptr=rawPointer;registeredInstance.$$.smartPtr=ptr;return registeredInstance["clone"]()}else{var rv=registeredInstance["clone"]();this.destructor(ptr);return rv}}function makeDefaultHandle(){if(this.isSmartPointer){return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this.pointeeType,ptr:rawPointer,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this,ptr})}}var actualType=this.registeredClass.getActualType(rawPointer);var registeredPointerRecord=registeredPointers[actualType];if(!registeredPointerRecord){return makeDefaultHandle.call(this)}var toType;if(this.isConst){toType=registeredPointerRecord.constPointerType}else{toType=registeredPointerRecord.pointerType}var dp=downcastPointer(rawPointer,this.registeredClass,toType.registeredClass);if(dp===null){return makeDefaultHandle.call(this)}if(this.isSmartPointer){return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp})}}var init_RegisteredPointer=()=>{Object.assign(RegisteredPointer.prototype,{getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}return ptr},destructor(ptr){this.rawDestructor?.(ptr)},readValueFromPointer:readPointer,fromWireType:RegisteredPointer_fromWireType})};function RegisteredPointer(name,registeredClass,isReference,isConst,isSmartPointer,pointeeType,sharingPolicy,rawGetPointee,rawConstructor,rawShare,rawDestructor){this.name=name;this.registeredClass=registeredClass;this.isReference=isReference;this.isConst=isConst;this.isSmartPointer=isSmartPointer;this.pointeeType=pointeeType;this.sharingPolicy=sharingPolicy;this.rawGetPointee=rawGetPointee;this.rawConstructor=rawConstructor;this.rawShare=rawShare;this.rawDestructor=rawDestructor;if(!isSmartPointer&®isteredClass.baseClass===undefined){if(isConst){this.toWireType=constNoSmartPtrRawPointerToWireType;this.destructorFunction=null}else{this.toWireType=nonConstNoSmartPtrRawPointerToWireType;this.destructorFunction=null}}else{this.toWireType=genericPointerToWireType}}var replacePublicSymbol=(name,value,numArguments)=>{if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistent public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var dynCall=(sig,ptr,args=[],promising=false)=>{for(var i=1;i(...args)=>dynCall(sig,ptr,args,promising);var embind__requireFunction=(signature,rawFunction,isAsync=false)=>{signature=AsciiToString(signature);function makeDynCaller(){if(signature.includes("p")){return getDynCaller(signature,rawFunction,isAsync)}var rtn=getWasmTableEntry(rawFunction);return rtn}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`)}return fp};class UnboundTypeError extends Error{}var getTypeName=type=>{var ptr=___getTypeName(type);var rv=AsciiToString(ptr);_free(ptr);return rv};var throwUnboundTypeError=(message,types)=>{var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(`${message}: `+unboundTypes.map(getTypeName).join([", "]))};function __embind_register_class(rawType,rawPointerType,rawConstPointerType,baseClassRawType,getActualTypeSignature,getActualType,upcastSignature,upcast,downcastSignature,downcast,name,destructorSignature,rawDestructor){rawType=bigintToI53Checked(rawType);rawPointerType=bigintToI53Checked(rawPointerType);rawConstPointerType=bigintToI53Checked(rawConstPointerType);baseClassRawType=bigintToI53Checked(baseClassRawType);getActualTypeSignature=bigintToI53Checked(getActualTypeSignature);getActualType=bigintToI53Checked(getActualType);upcastSignature=bigintToI53Checked(upcastSignature);upcast=bigintToI53Checked(upcast);downcastSignature=bigintToI53Checked(downcastSignature);downcast=bigintToI53Checked(downcast);name=bigintToI53Checked(name);destructorSignature=bigintToI53Checked(destructorSignature);rawDestructor=bigintToI53Checked(rawDestructor);name=AsciiToString(name);getActualType=embind__requireFunction(getActualTypeSignature,getActualType);upcast&&=embind__requireFunction(upcastSignature,upcast);downcast&&=embind__requireFunction(downcastSignature,downcast);rawDestructor=embind__requireFunction(destructorSignature,rawDestructor);var legalFunctionName=makeLegalFunctionName(name);exposePublicSymbol(legalFunctionName,function(){throwUnboundTypeError(`Cannot construct ${name} due to unbound types`,[baseClassRawType])});whenDependentTypesAreResolved([rawType,rawPointerType,rawConstPointerType],baseClassRawType?[baseClassRawType]:[],base=>{base=base[0];var baseClass;var basePrototype;if(baseClassRawType){baseClass=base.registeredClass;basePrototype=baseClass.instancePrototype}else{basePrototype=ClassHandle.prototype}var constructor=createNamedFunction(name,function(...args){if(Object.getPrototypeOf(this)!==instancePrototype){throw new BindingError(`Use 'new' to construct ${name}`)}if(undefined===registeredClass.constructor_body){throw new BindingError(`${name} has no accessible constructor`)}var body=registeredClass.constructor_body[args.length];if(undefined===body){throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`)}return body.apply(this,args)});var instancePrototype=Object.create(basePrototype,{constructor:{value:constructor}});constructor.prototype=instancePrototype;var registeredClass=new RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast);if(registeredClass.baseClass){registeredClass.baseClass.__derivedClasses??=[];registeredClass.baseClass.__derivedClasses.push(registeredClass)}var referenceConverter=new RegisteredPointer(name,registeredClass,true,false,false);var pointerConverter=new RegisteredPointer(name+"*",registeredClass,false,false,false);var constPointerConverter=new RegisteredPointer(name+" const*",registeredClass,false,true,false);registeredPointers[rawType]={pointerType:pointerConverter,constPointerType:constPointerConverter};replacePublicSymbol(legalFunctionName,constructor);return[referenceConverter,pointerConverter,constPointerConverter]})}var heap32VectorToArray=(count,firstElement)=>{var array=[];for(var i=0;i{classType=classType[0];var humanName=`constructor ${classType.name}`;if(undefined===classType.registeredClass.constructor_body){classType.registeredClass.constructor_body=[]}if(undefined!==classType.registeredClass.constructor_body[argCount-1]){throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`)}classType.registeredClass.constructor_body[argCount-1]=()=>{throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`,rawArgTypes)};whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{argTypes.splice(1,0,null);classType.registeredClass.constructor_body[argCount-1]=craftInvokerFunction(humanName,argTypes,null,invoker,rawConstructor);return[]});return[]})};var getFunctionName=signature=>{signature=signature.trim();const argsIndex=signature.indexOf("(");if(argsIndex===-1)return signature;return signature.slice(0,argsIndex)};var __embind_register_class_function=function(rawClassType,methodName,argCount,rawArgTypesAddr,invokerSignature,rawInvoker,context,isPureVirtual,isAsync,isNonnullReturn){rawClassType=bigintToI53Checked(rawClassType);methodName=bigintToI53Checked(methodName);rawArgTypesAddr=bigintToI53Checked(rawArgTypesAddr);invokerSignature=bigintToI53Checked(invokerSignature);rawInvoker=bigintToI53Checked(rawInvoker);context=bigintToI53Checked(context);var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);methodName=AsciiToString(methodName);methodName=getFunctionName(methodName);rawInvoker=embind__requireFunction(invokerSignature,rawInvoker,isAsync);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`${classType.name}.${methodName}`;if(methodName.startsWith("@@")){methodName=Symbol[methodName.substring(2)]}if(isPureVirtual){classType.registeredClass.pureVirtualFunctions.push(methodName)}function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`,rawArgTypes)}var proto=classType.registeredClass.instancePrototype;var method=proto[methodName];if(undefined===method||undefined===method.overloadTable&&method.className!==classType.name&&method.argCount===argCount-2){unboundTypesHandler.argCount=argCount-2;unboundTypesHandler.className=classType.name;proto[methodName]=unboundTypesHandler}else{ensureOverloadTable(proto,methodName,humanName);proto[methodName].overloadTable[argCount-2]=unboundTypesHandler}whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{var memberFunction=craftInvokerFunction(humanName,argTypes,classType,rawInvoker,context,isAsync);if(undefined===proto[methodName].overloadTable){memberFunction.argCount=argCount-2;proto[methodName]=memberFunction}else{proto[methodName].overloadTable[argCount-2]=memberFunction}return[]});return[]})};var __embind_register_constant=function(name,type,value){name=bigintToI53Checked(name);type=bigintToI53Checked(type);name=AsciiToString(name);whenDependentTypesAreResolved([],[type],type=>{type=type[0];Module[name]=type.fromWireType(value);return[]})};var emval_freelist=[];var emval_handles=[0,1,,1,null,1,true,1,false,1];function __emval_decref(handle){handle=bigintToI53Checked(handle);if(handle>9&&0===--emval_handles[handle+1]){emval_handles[handle]=undefined;emval_freelist.push(handle)}}var Emval={toValue:handle=>{if(!handle){throwBindingError(`Cannot use deleted val. handle = ${handle}`)}return emval_handles[handle]},toHandle:value=>{switch(value){case undefined:return 2;case null:return 4;case true:return 6;case false:return 8;default:{const handle=emval_freelist.pop()||emval_handles.length;emval_handles[handle]=value;emval_handles[handle+1]=1;return handle}}}};var EmValType={name:"emscripten::val",fromWireType:handle=>{var rv=Emval.toValue(handle);__emval_decref(handle);return rv},toWireType:(destructors,value)=>Emval.toHandle(value),readValueFromPointer:readPointer,destructorFunction:null};function __embind_register_emval(rawType){rawType=bigintToI53Checked(rawType);return registerType(rawType,EmValType)}var enumReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP8)[pointer])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU8)[pointer])};case 2:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP16)[pointer/2])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU16)[pointer/2])};case 4:return signed?function(pointer){return this.fromWireType((growMemViews(),HEAP32)[pointer/4])}:function(pointer){return this.fromWireType((growMemViews(),HEAPU32)[pointer/4])};default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};function __embind_register_enum(rawType,name,size,isSigned){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);size=bigintToI53Checked(size);name=AsciiToString(name);function ctor(){}ctor.values={};registerType(rawType,{name,constructor:ctor,fromWireType:function(c){return this.constructor.values[c]},toWireType:(destructors,c)=>c.value,readValueFromPointer:enumReadValueFromPointer(name,size,isSigned),destructorFunction:null});exposePublicSymbol(name,ctor)}var requireRegisteredType=(rawType,humanName)=>{var impl=registeredTypes[rawType];if(undefined===impl){throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`)}return impl};function __embind_register_enum_value(rawEnumType,name,enumValue){rawEnumType=bigintToI53Checked(rawEnumType);name=bigintToI53Checked(name);var enumType=requireRegisteredType(rawEnumType,"enum");name=AsciiToString(name);var Enum=enumType.constructor;var Value=Object.create(enumType.constructor.prototype,{value:{value:enumValue},constructor:{value:createNamedFunction(`${enumType.name}_${name}`,function(){})}});Enum.values[enumValue]=Value;Enum[name]=Value}var floatReadValueFromPointer=(name,width)=>{switch(width){case 4:return function(pointer){return this.fromWireType((growMemViews(),HEAPF32)[pointer/4])};case 8:return function(pointer){return this.fromWireType((growMemViews(),HEAPF64)[pointer/8])};default:throw new TypeError(`invalid float width (${width}): ${name}`)}};var __embind_register_float=function(rawType,name,size){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);size=bigintToI53Checked(size);name=AsciiToString(name);registerType(rawType,{name,fromWireType:value=>value,toWireType:(destructors,value)=>value,readValueFromPointer:floatReadValueFromPointer(name,size),destructorFunction:null})};function __embind_register_function(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn,isAsync,isNonnullReturn){name=bigintToI53Checked(name);rawArgTypesAddr=bigintToI53Checked(rawArgTypesAddr);signature=bigintToI53Checked(signature);rawInvoker=bigintToI53Checked(rawInvoker);fn=bigintToI53Checked(fn);var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=AsciiToString(name);name=getFunctionName(name);rawInvoker=embind__requireFunction(signature,rawInvoker,isAsync);exposePublicSymbol(name,function(){throwUnboundTypeError(`Cannot call ${name} due to unbound types`,argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,argTypes=>{var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn,isAsync),argCount-1);return[]})}var __embind_register_integer=function(primitiveType,name,size,minRange,maxRange){primitiveType=bigintToI53Checked(primitiveType);name=bigintToI53Checked(name);size=bigintToI53Checked(size);name=AsciiToString(name);const isUnsignedType=minRange===0;let fromWireType=value=>value;if(isUnsignedType){var bitshift=32-8*size;fromWireType=value=>value<>>bitshift;maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>value,readValueFromPointer:integerReadValueFromPointer(name,size,minRange!==0),destructorFunction:null})};function __embind_register_memory_view(rawType,dataTypeIndex,name){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){var size=Number((growMemViews(),HEAPU64)[handle/8]);var data=Number((growMemViews(),HEAPU64)[(handle+8)/8]);return new TA((growMemViews(),HEAP8).buffer,data,size)}name=AsciiToString(name);registerType(rawType,{name,fromWireType:decodeMemoryView,readValueFromPointer:decodeMemoryView},{ignoreDuplicateRegistrations:true})}var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,(growMemViews(),HEAPU8),outPtr,maxBytesToWrite);function __embind_register_std_string(rawType,name){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);name=AsciiToString(name);var stdStringIsUTF8=true;registerType(rawType,{name,fromWireType(value){var length=Number((growMemViews(),HEAPU64)[value/8]);var payload=value+8;var str;if(stdStringIsUTF8){str=UTF8ToString(payload,length,true)}else{str="";for(var i=0;i255){_free(base);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}(growMemViews(),HEAPU8)[ptr+i]=charCode}}}else{(growMemViews(),HEAPU8).set(value,ptr)}if(destructors!==null){destructors.push(_free,base)}return base},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})}var UTF16Decoder=globalThis.TextDecoder?new TextDecoder("utf-16le"):undefined;var UTF16ToString=(ptr,maxBytesToRead,ignoreNul)=>{var idx=ptr/2;var endIdx=findStringEnd((growMemViews(),HEAPU16),idx,maxBytesToRead/2,ignoreNul);if(endIdx-idx>16&&UTF16Decoder)return UTF16Decoder.decode((growMemViews(),HEAPU16).slice(idx,endIdx));var str="";for(var i=idx;i{maxBytesToWrite??=2147483647;if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWritestr.length*2;var UTF32ToString=(ptr,maxBytesToRead,ignoreNul)=>{var str="";var startIdx=ptr/4;for(var i=0;!(i>=maxBytesToRead/4);i++){var utf32=(growMemViews(),HEAPU32)[startIdx+i];if(!utf32&&!ignoreNul)break;str+=String.fromCodePoint(utf32)}return str};var stringToUTF32=(str,outPtr,maxBytesToWrite)=>{maxBytesToWrite??=2147483647;if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i65535){i++}(growMemViews(),HEAP32)[outPtr/4]=codePoint;outPtr+=4;if(outPtr+4>endPtr)break}(growMemViews(),HEAP32)[outPtr/4]=0;return outPtr-startPtr};var lengthBytesUTF32=str=>{var len=0;for(var i=0;i65535){i++}len+=4}return len};function __embind_register_std_wstring(rawType,charSize,name){rawType=bigintToI53Checked(rawType);charSize=bigintToI53Checked(charSize);name=bigintToI53Checked(name);name=AsciiToString(name);var decodeString,encodeString,lengthBytesUTF;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16}else{decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32}registerType(rawType,{name,fromWireType:value=>{var length=Number((growMemViews(),HEAPU64)[value/8]);var str=decodeString(value+8,length*charSize,true);_free(value);return str},toWireType:(destructors,value)=>{if(!(typeof value=="string")){throwBindingError(`Cannot pass non-string to C++ string type ${name}`)}var length=lengthBytesUTF(value);var ptr=_malloc(8+length+charSize);(growMemViews(),HEAPU64)[ptr/8]=BigInt(length/charSize);encodeString(value,ptr+8,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})}function __embind_register_value_object(rawType,name,constructorSignature,rawConstructor,destructorSignature,rawDestructor){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);constructorSignature=bigintToI53Checked(constructorSignature);rawConstructor=bigintToI53Checked(rawConstructor);destructorSignature=bigintToI53Checked(destructorSignature);rawDestructor=bigintToI53Checked(rawDestructor);structRegistrations[rawType]={name:AsciiToString(name),rawConstructor:embind__requireFunction(constructorSignature,rawConstructor),rawDestructor:embind__requireFunction(destructorSignature,rawDestructor),fields:[]}}function __embind_register_value_object_field(structType,fieldName,getterReturnType,getterSignature,getter,getterContext,setterArgumentType,setterSignature,setter,setterContext){structType=bigintToI53Checked(structType);fieldName=bigintToI53Checked(fieldName);getterReturnType=bigintToI53Checked(getterReturnType);getterSignature=bigintToI53Checked(getterSignature);getter=bigintToI53Checked(getter);getterContext=bigintToI53Checked(getterContext);setterArgumentType=bigintToI53Checked(setterArgumentType);setterSignature=bigintToI53Checked(setterSignature);setter=bigintToI53Checked(setter);setterContext=bigintToI53Checked(setterContext);structRegistrations[structType].fields.push({fieldName:AsciiToString(fieldName),getterReturnType,getter:embind__requireFunction(getterSignature,getter),getterContext,setterArgumentType,setter:embind__requireFunction(setterSignature,setter),setterContext})}var __embind_register_void=function(rawType,name){rawType=bigintToI53Checked(rawType);name=bigintToI53Checked(name);name=AsciiToString(name);registerType(rawType,{isVoid:true,name,fromWireType:()=>undefined,toWireType:(destructors,o)=>undefined})};function __emscripten_init_main_thread_js(tb){tb=bigintToI53Checked(tb);__emscripten_thread_init(tb,!ENVIRONMENT_IS_WORKER,1,!ENVIRONMENT_IS_WEB,2097152,false);PThread.threadInitTLS()}var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}quit_(1,e)};var maybeExit=()=>{if(!keepRuntimeAlive()){try{if(ENVIRONMENT_IS_PTHREAD){if(_pthread_self())__emscripten_thread_exit(EXITSTATUS);return}_exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){return}try{func();maybeExit()}catch(e){handleException(e)}};function __emscripten_thread_mailbox_await(pthread_ptr){pthread_ptr=bigintToI53Checked(pthread_ptr);if(Atomics.waitAsync){var wait=Atomics.waitAsync((growMemViews(),HEAP32),pthread_ptr/4,pthread_ptr);wait.value.then(checkMailbox);var waitingAsync=pthread_ptr+228;Atomics.store((growMemViews(),HEAP32),waitingAsync/4,1)}}var checkMailbox=()=>callUserCallback(()=>{var pthread_ptr=_pthread_self();if(pthread_ptr){__emscripten_thread_mailbox_await(pthread_ptr);__emscripten_check_mailbox()}});function __emscripten_notify_mailbox_postmessage(targetThread,currThreadId){targetThread=bigintToI53Checked(targetThread);currThreadId=bigintToI53Checked(currThreadId);if(targetThread==currThreadId){setTimeout(checkMailbox)}else if(ENVIRONMENT_IS_PTHREAD){postMessage({targetThread,cmd:"checkMailbox"})}else{var worker=PThread.pthreads[targetThread];if(!worker){return}worker.postMessage({cmd:"checkMailbox"})}}var proxiedJSCallArgs=[];function __emscripten_receive_on_main_thread_js(funcIndex,emAsmAddr,callingThread,numCallArgs,args){emAsmAddr=bigintToI53Checked(emAsmAddr);callingThread=bigintToI53Checked(callingThread);args=bigintToI53Checked(args);numCallArgs/=2;proxiedJSCallArgs.length=numCallArgs;var b=args/8;for(var i=0;i{throw Infinity};var emval_methodCallers=[];var emval_addMethodCaller=caller=>{var id=emval_methodCallers.length;emval_methodCallers.push(caller);return id};var emval_lookupTypes=(argCount,argTypes)=>{var a=new Array(argCount);for(var i=0;i{var destructors=[];var result=toReturnWire(destructors,handle);if(destructors.length){(growMemViews(),HEAPU64)[destructorsRef/8]=BigInt(Emval.toHandle(destructors))}return result};var emval_symbols={};var getStringOrSymbol=address=>{var symbol=emval_symbols[address];if(symbol===undefined){return AsciiToString(address)}return symbol};var __emval_create_invoker=function(argCount,argTypesPtr,kind){argTypesPtr=bigintToI53Checked(argTypesPtr);var ret=(()=>{var GenericWireTypeSize=16;var[retType,...argTypes]=emval_lookupTypes(argCount,argTypesPtr);var toReturnWire=retType.toWireType.bind(retType);var argFromPtr=argTypes.map(type=>type.readValueFromPointer.bind(type));argCount--;var captures={toValue:Emval.toValue};var args=argFromPtr.map((argFromPtr,i)=>{var captureName=`argFromPtr${i}`;captures[captureName]=argFromPtr;return`${captureName}(args${i?"+"+i*GenericWireTypeSize:""})`});var functionBody;switch(kind){case 0:functionBody="toValue(handle)";break;case 2:functionBody="new (toValue(handle))";break;case 3:functionBody="";break;case 1:captures["getStringOrSymbol"]=getStringOrSymbol;functionBody="toValue(handle)[getStringOrSymbol(methodName)]";break}functionBody+=`(${args})`;if(!retType.isVoid){captures["toReturnWire"]=toReturnWire;captures["emval_returnValue"]=emval_returnValue;functionBody=`return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`}functionBody=`return function (handle, methodName, destructorsRef, args) {\n ${functionBody}\n }`;var invokerFunction=new Function(Object.keys(captures),functionBody)(...Object.values(captures));var functionName=`methodCaller<(${argTypes.map(t=>t.name)}) => ${retType.name}>`;return emval_addMethodCaller(createNamedFunction(functionName,invokerFunction))})();return BigInt(ret)};var __emval_get_property=function(handle,key){handle=bigintToI53Checked(handle);key=bigintToI53Checked(key);var ret=(()=>{handle=Emval.toValue(handle);key=Emval.toValue(key);return Emval.toHandle(handle[key])})();return BigInt(ret)};function __emval_incref(handle){handle=bigintToI53Checked(handle);if(handle>9){emval_handles[handle+1]+=1}}function __emval_invoke(caller,handle,methodName,destructorsRef,args){caller=bigintToI53Checked(caller);handle=bigintToI53Checked(handle);methodName=bigintToI53Checked(methodName);destructorsRef=bigintToI53Checked(destructorsRef);args=bigintToI53Checked(args);return emval_methodCallers[caller](handle,methodName,destructorsRef,args)}var __emval_invoke_i64=__emval_invoke;var __emval_new_cstring=v=>{v=bigintToI53Checked(v);return BigInt(Emval.toHandle(getStringOrSymbol(v)))};function __emval_run_destructors(handle){handle=bigintToI53Checked(handle);var destructors=Emval.toValue(handle);runDestructors(destructors);__emval_decref(handle)}function __mmap_js(len,prot,flags,fd,offset,allocated,addr){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(10,0,1,len,prot,flags,fd,offset,allocated,addr);len=bigintToI53Checked(len);offset=bigintToI53Checked(offset);allocated=bigintToI53Checked(allocated);addr=bigintToI53Checked(addr);try{var stream=SYSCALLS.getStreamFromFD(fd);var res=FS.mmap(stream,len,offset,prot,flags);var ptr=res.ptr;(growMemViews(),HEAP32)[allocated/4]=res.allocated;(growMemViews(),HEAPU64)[addr/8]=BigInt(ptr);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function __munmap_js(addr,len,prot,flags,fd,offset){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(11,0,1,addr,len,prot,flags,fd,offset);addr=bigintToI53Checked(addr);len=bigintToI53Checked(len);offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);if(prot&2){SYSCALLS.doMsync(addr,stream,len,flags,offset)}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __tzset_js=function(timezone,daylight,std_name,dst_name){timezone=bigintToI53Checked(timezone);daylight=bigintToI53Checked(daylight);std_name=bigintToI53Checked(std_name);dst_name=bigintToI53Checked(dst_name);var currentYear=(new Date).getFullYear();var winter=new Date(currentYear,0,1);var summer=new Date(currentYear,6,1);var winterOffset=winter.getTimezoneOffset();var summerOffset=summer.getTimezoneOffset();var stdTimezoneOffset=Math.max(winterOffset,summerOffset);(growMemViews(),HEAPU64)[timezone/8]=BigInt(stdTimezoneOffset*60);(growMemViews(),HEAP32)[daylight/4]=Number(winterOffset!=summerOffset);var extractZone=timezoneOffset=>{var sign=timezoneOffset>=0?"-":"+";var absOffset=Math.abs(timezoneOffset);var hours=String(Math.floor(absOffset/60)).padStart(2,"0");var minutes=String(absOffset%60).padStart(2,"0");return`UTC${sign}${hours}${minutes}`};var winterName=extractZone(winterOffset);var summerName=extractZone(summerOffset);if(summerOffsetperformance.timeOrigin+performance.now();var _emscripten_date_now=()=>Date.now();var nowIsMonotonic=1;var checkWasiClock=clock_id=>clock_id>=0&&clock_id<=3;function _clock_time_get(clk_id,ignored_precision,ptime){ignored_precision=bigintToI53Checked(ignored_precision);ptime=bigintToI53Checked(ptime);if(!checkWasiClock(clk_id)){return 28}var now;if(clk_id===0){now=_emscripten_date_now()}else if(nowIsMonotonic){now=_emscripten_get_now()}else{return 52}var nsec=Math.round(now*1e3*1e3);(growMemViews(),HEAP64)[ptime/8]=BigInt(nsec);return 0}var _emscripten_check_blocking_allowed=()=>{};var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var _emscripten_exit_with_live_runtime=()=>{runtimeKeepalivePush();throw"unwind"};var getHeapMax=()=>12884901888;var _emscripten_get_heap_max=()=>BigInt(getHeapMax());var _emscripten_num_logical_cores=()=>navigator["hardwareConcurrency"];var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(BigInt(pages));updateMemoryViews();return 1}catch(e){}};function _emscripten_resize_heap(requestedSize){requestedSize=bigintToI53Checked(requestedSize);var oldSize=(growMemViews(),HEAPU8).length;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false}var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.language||"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};function _environ_get(__environ,environ_buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(12,0,1,__environ,environ_buf);__environ=bigintToI53Checked(__environ);environ_buf=bigintToI53Checked(environ_buf);var bufSize=0;var envp=0;for(var string of getEnvStrings()){var ptr=environ_buf+bufSize;(growMemViews(),HEAPU64)[(__environ+envp)/8]=BigInt(ptr);bufSize+=stringToUTF8(string,ptr,Infinity)+1;envp+=8}return 0}function _environ_sizes_get(penviron_count,penviron_buf_size){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(13,0,1,penviron_count,penviron_buf_size);penviron_count=bigintToI53Checked(penviron_count);penviron_buf_size=bigintToI53Checked(penviron_buf_size);var strings=getEnvStrings();(growMemViews(),HEAPU64)[penviron_count/8]=BigInt(strings.length);var bufSize=0;for(var string of strings){bufSize+=lengthBytesUTF8(string)+1}(growMemViews(),HEAPU64)[penviron_buf_size/8]=BigInt(bufSize);return 0}function _fd_close(fd){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(14,0,1,fd);try{var stream=SYSCALLS.getStreamFromFD(fd);FS.close(stream);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doReadv=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i{var ret=0;for(var i=0;i0){Module["preInit"].shift()()}}}var proxiedFunctionTable=[_proc_exit,exitOnMainThread,pthreadCreateProxied,___syscall_fcntl64,___syscall_fstat64,___syscall_ioctl,___syscall_lstat64,___syscall_newfstatat,___syscall_openat,___syscall_stat64,__mmap_js,__munmap_js,_environ_get,_environ_sizes_get,_fd_close,_fd_read,_fd_seek,_fd_write];var ___getTypeName,__embind_initialize_bindings,_free,_pthread_self,_malloc,__emscripten_tls_init,_emscripten_builtin_memalign,__emscripten_thread_init,__emscripten_thread_crashed,__emscripten_run_js_on_main_thread,__emscripten_thread_free_data,__emscripten_thread_exit,__emscripten_check_mailbox,_setThrew,_emscripten_stack_set_limits,__emscripten_stack_restore,__emscripten_stack_alloc,_emscripten_stack_get_current,__indirect_function_table,wasmTable;function assignWasmExports(wasmExports){___getTypeName=wasmExports["na"];__embind_initialize_bindings=wasmExports["oa"];_free=wasmExports["pa"];_pthread_self=wasmExports["ra"];_malloc=wasmExports["sa"];__emscripten_tls_init=wasmExports["ta"];_emscripten_builtin_memalign=wasmExports["ua"];__emscripten_thread_init=wasmExports["va"];__emscripten_thread_crashed=wasmExports["wa"];__emscripten_run_js_on_main_thread=wasmExports["xa"];__emscripten_thread_free_data=wasmExports["ya"];__emscripten_thread_exit=wasmExports["za"];__emscripten_check_mailbox=wasmExports["Aa"];_setThrew=wasmExports["Ba"];_emscripten_stack_set_limits=wasmExports["Ca"];__emscripten_stack_restore=wasmExports["Da"];__emscripten_stack_alloc=wasmExports["Ea"];_emscripten_stack_get_current=wasmExports["Fa"];__indirect_function_table=wasmTable=wasmExports["qa"]}var wasmImports;function assignWasmImports(){wasmImports={y:___cxa_throw,la:___pthread_create_js,G:___syscall_fcntl64,ka:___syscall_fstat64,ja:___syscall_ioctl,ia:___syscall_newfstatat,F:___syscall_openat,ca:__abort_js,u:__embind_finalize_value_object,x:__embind_register_bigint,ba:__embind_register_bool,w:__embind_register_class,v:__embind_register_class_constructor,b:__embind_register_class_function,e:__embind_register_constant,aa:__embind_register_emval,m:__embind_register_enum,c:__embind_register_enum_value,D:__embind_register_float,g:__embind_register_function,q:__embind_register_integer,j:__embind_register_memory_view,$:__embind_register_std_string,z:__embind_register_std_wstring,t:__embind_register_value_object,d:__embind_register_value_object_field,_:__embind_register_void,Z:__emscripten_init_main_thread_js,Y:__emscripten_notify_mailbox_postmessage,X:__emscripten_receive_on_main_thread_js,C:__emscripten_thread_cleanup,W:__emscripten_thread_mailbox_await,V:__emscripten_thread_set_strongref,U:__emscripten_throw_longjmp,i:__emval_create_invoker,f:__emval_decref,p:__emval_get_property,l:__emval_incref,k:__emval_invoke,r:__emval_invoke_i64,o:__emval_new_cstring,h:__emval_run_destructors,T:__mmap_js,S:__munmap_js,R:__tzset_js,ha:_clock_time_get,B:_emscripten_check_blocking_allowed,Q:_emscripten_date_now,P:_emscripten_exit_with_live_runtime,O:_emscripten_get_heap_max,n:_emscripten_get_now,N:_emscripten_has_threading_support,M:_emscripten_num_logical_cores,L:_emscripten_resize_heap,ga:_environ_get,fa:_environ_sizes_get,K:_exit,A:_fd_close,ea:_fd_read,da:_fd_seek,E:_fd_write,J:invoke_ij,s:invoke_vj,I:invoke_vji,H:invoke_vjji,a:wasmMemory}}function invoke_vj(index,a1){var sp=stackSave();try{getWasmTableEntry(Number(index))(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vjji(index,a1,a2,a3){var sp=stackSave();try{getWasmTableEntry(Number(index))(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_ij(index,a1){var sp=stackSave();try{return getWasmTableEntry(Number(index))(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vji(index,a1,a2){var sp=stackSave();try{getWasmTableEntry(Number(index))(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function applySignatureConversions(wasmExports){wasmExports=Object.assign({},wasmExports);var makeWrapper_pp=f=>a0=>Number(f(BigInt(a0)));var makeWrapper__p=f=>a0=>f(BigInt(a0));var makeWrapper_p=f=>()=>Number(f());var makeWrapper_ppp=f=>(a0,a1)=>Number(f(BigInt(a0),BigInt(a1)));var makeWrapper__p_____=f=>(a0,a1,a2,a3,a4,a5)=>f(BigInt(a0),a1,a2,a3,a4,a5);var makeWrapper___p_p_=f=>(a0,a1,a2,a3,a4)=>f(a0,BigInt(a1),a2,BigInt(a3),a4);var makeWrapper__pp=f=>(a0,a1)=>f(BigInt(a0),BigInt(a1));wasmExports["na"]=makeWrapper_pp(wasmExports["na"]);wasmExports["pa"]=makeWrapper__p(wasmExports["pa"]);wasmExports["ra"]=makeWrapper_p(wasmExports["ra"]);wasmExports["sa"]=makeWrapper_pp(wasmExports["sa"]);wasmExports["ua"]=makeWrapper_ppp(wasmExports["ua"]);wasmExports["va"]=makeWrapper__p_____(wasmExports["va"]);wasmExports["xa"]=makeWrapper___p_p_(wasmExports["xa"]);wasmExports["ya"]=makeWrapper__p(wasmExports["ya"]);wasmExports["za"]=makeWrapper__p(wasmExports["za"]);wasmExports["Ba"]=makeWrapper__p(wasmExports["Ba"]);wasmExports["Ca"]=makeWrapper__pp(wasmExports["Ca"]);wasmExports["Da"]=makeWrapper__p(wasmExports["Da"]);wasmExports["Ea"]=makeWrapper_pp(wasmExports["Ea"]);wasmExports["Fa"]=makeWrapper_p(wasmExports["Fa"]);return wasmExports}function run(){if(runDependencies>0){dependenciesFulfilled=run;return}if(ENVIRONMENT_IS_PTHREAD){readyPromiseResolve?.(Module);initRuntime();return}preRun();if(runDependencies>0){dependenciesFulfilled=run;return}function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;if(!ENVIRONMENT_IS_PTHREAD){wasmExports=await (createWasm());run()}if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +;return moduleRtn}})();if(typeof exports==="object"&&typeof module==="object"){module.exports=BASIS;module.exports.default=BASIS}else if(typeof define==="function"&&define["amd"])define([],()=>BASIS);var isPthread=globalThis.self?.name?.startsWith("em-pthread");isPthread&&BASIS(); diff --git a/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.wasm b/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.wasm new file mode 100644 index 0000000000..ebe8bbd04e Binary files /dev/null and b/external/basis_universal/webgl/encoder/build/basis_encoder_threads_wasm64.wasm differ diff --git a/external/basis_universal/webgl/encoder/build_notes.txt b/external/basis_universal/webgl/encoder/build_notes.txt new file mode 100644 index 0000000000..2db46f941d --- /dev/null +++ b/external/basis_universal/webgl/encoder/build_notes.txt @@ -0,0 +1,26 @@ +# Prereq: activate emsdk first (so emcmake/em++ are on PATH) +# Linux/macOS: +source /path/to/emsdk/emsdk_env.sh +# Windows PowerShell: +# & "C:\path\to\emsdk\emsdk_env.ps1" + +# ===== Release (fast; same behavior as your original file) ===== +emcmake cmake -S . -B build-release -DCMAKE_BUILD_TYPE=Release +cmake --build build-release -j + +# ===== Debug (symbols + assertions) ===== +emcmake cmake -S . -B build-debug -DCMAKE_BUILD_TYPE=Debug +cmake --build build-debug -j + +# ===== SAN (ASan + UBSan; great for catching bugs) ===== +emcmake cmake -S . -B build-san -DCMAKE_BUILD_TYPE=SAN +cmake --build build-san -j + +# Build a single target (optional) instead of all three: +cmake --build build-release -j --target basis_encoder.js +cmake --build build-release -j --target basis_encoder_threads.js +cmake --build build-release -j --target basis_encoder_threads_wasm64.js + +# Toggle Zstd (OFF = smaller binary, no KTX2 Zstd compression) +emcmake cmake -S . -B build-release -DCMAKE_BUILD_TYPE=Release -DKTX2_ZSTANDARD=OFF +cmake --build build-release -j diff --git a/external/basis_universal/webgl/index.html b/external/basis_universal/webgl/index.html index d469dcd707..f998dfbb20 100644 --- a/external/basis_universal/webgl/index.html +++ b/external/basis_universal/webgl/index.html @@ -6,12 +6,12 @@

Basis Universal LDR/HDR WebGL demos

- Example web pages using the transcoder (compiled to WASM) to render - .KTX2/.basis textures in WebGL, and the encoder to encode .KTX2 files. + Example web pages using the transcoder library, compiled to WASM, to render + .KTX2/.basis textures in WebGL, and the compressor library to encode .KTX2 files.

diff --git a/external/basis_universal/webgl/ktx2_encode_test/assets/license.txt b/external/basis_universal/webgl/ktx2_encode_test/assets/license.txt new file mode 100644 index 0000000000..e06b8ffc57 --- /dev/null +++ b/external/basis_universal/webgl/ktx2_encode_test/assets/license.txt @@ -0,0 +1 @@ +The test images in this directory are not required to build or use the Basis Universal codec. They are not copyrighted or owned by Binomial LLC. diff --git a/external/basis_universal/webgl/ktx2_encode_test/index.html b/external/basis_universal/webgl/ktx2_encode_test/index.html index 8f98e924fc..d185db6114 100644 --- a/external/basis_universal/webgl/ktx2_encode_test/index.html +++ b/external/basis_universal/webgl/ktx2_encode_test/index.html @@ -1,1871 +1,3058 @@ - + + + - - -
-
- Basis Universal Multithreaded .KTX2 Supercompressed GPU Texture Encoding/Transcoding Testbed v0.67 -
- -
This demo uses the Basis Universal C++ transcoder (compiled to WebAssembly using Emscripten) to transcode a .ktx2 file to FORMAT -
It also supports encoding .PNG, .EXR or .HDR files to LDR or HDR .KTX2 files. -
Thanks to Evan Parker for providing webgl-texture-utils and this test bed. Go back. -
Enable your browser debug console (F12 on Chrome/Firefox) to see debug output. -
Note: The largest image resolution that can be compressed in the browser with -
this library is limited to around 6 megapixels due to 32-bit WASM memory -
constraints. Larger images risk running out of memory. - -

-
- Supported WebGL formats: -
- -
-
- Test -
- -
- -
- -
-
- Use Multithreading (if available) - -
Additional Worker Threads (Max 18): - -

- -
- .ktx2 file: - - -
- -
- .png/.jpg/.exr/.hdr file: - - -
-
- - -
- -
-

Drag and drop a PNG/JPG/EXR/HDR file here, or click to select a file.

- -
-
+ { + let astcHDR6x6Slider = document.getElementById('astc-hdr6x6-comp-level-slider'); + let compLevelValueDisplay = document.getElementById('astc-hdr6x6-comp-level-value'); + astcHDR6x6Slider.oninput = function() + { + compLevelValueDisplay.textContent = this.value; + } + } - + { + let uastcLDRSlider = document.getElementById('uastc-ldr-quality-slider'); + let qualityLDRValueDisplay = document.getElementById('uastc-ldr-quality-value'); + uastcLDRSlider.oninput = function() + { + qualityLDRValueDisplay.textContent = this.value; + } + } -
+ { + let rdoSlider = document.getElementById('rdo-quality-slider'); + let rdoValueDisplay = document.getElementById('rdo-quality-value'); + rdoSlider.oninput = function() + { + rdoValueDisplay.textContent = parseFloat(this.value).toFixed(1); + } + } - Visualization/Display Options: + { + let etc1SQualitySlider = document.getElementById('EncodeQuality'); + let etc1SQualitySliderValue = document.getElementById('encode-quality-value'); + etc1SQualitySlider.oninput = function() + { + etc1SQualitySliderValue.textContent = parseFloat(this.value).toFixed(0); + } + } -
- Disable ETC1S->BC7 Transcoder's Chroma Artifact Filtering: - + { + let xuastcLDREffortSlider = document.getElementById('xuastc_ldr_effort_level_slider'); + let xuastcLDREffortValue = document.getElementById('xuastc_ldr_effort_level_value'); + xuastcLDREffortSlider.oninput = function() + { + xuastcLDREffortValue.textContent = this.value; + } + } -
-
+ { + let xuastcLDRDCTSlider = document.getElementById('xuastc_ldr_dct_quality_slider'); + let xuastcLDRDCTValue = document.getElementById('xuastc_ldr_dct_quality_value'); + xuastcLDRDCTSlider.oninput = function() + { + xuastcLDRDCTValue.textContent = this.value; + } + } + + { + document.getElementById("unified-effort-slider").oninput = function() + { + document.getElementById("unified-effort-value").textContent = this.value; + } + + document.getElementById("unified-quality-slider").oninput = function() + { + document.getElementById("unified-quality-value").textContent = this.value; + } + } + + runLoadFile(); + } - Higher quality ASTC 6x6 HDR->BC6H transcoding: - + function updateErrorLine(message) + { + const errorLine = document.getElementById('error-line'); + errorLine.textContent = message; + errorLine.style.color = message.trim() ? 'red' : ''; + } -

- - - + -
-
- Disabled + -
+ - - - 1 + -
- -
+
+
+ +
+
+ Basis Universal .KTX2 Supercompressed GPU Texture Encoding/Transcoding Testbed v2.15 +
+ +
This simple demo uses the Basis Universal C++ transcoder (compiled to WebAssembly using Emscripten) to transcode a .ktx2 file to: +
FORMAT +
+
The viewer is implemented in WebGL and renders a single textured quad. It also supports encoding .PNG, .JPG, .EXR or .HDR files to LDR or HDR .KTX2 files. + Thanks to Evan Parker for providing webgl-texture-utils and this test bed. Go back. + Notes: Enable your browser debug console (F12 on Chrome/Firefox) to see debug output. + The largest image resolution that can be compressed in the browser with + this library is limited to either 12 megapixels or 4 megapixels (depending on format and WASM64/WASM32) to avoid running out of WASM memory. + +

+
+ Supported WebGL formats: +
+ +
+
+ Test +
+ +
+ +
+ +
+
+
+ Use Multithreading (if available) + +
Additional Worker Threads (Max 18): + +

+ +
+ .ktx2 file: + + +
+ +
+ .png/.jpg/.exr/.hdr file: + + +
+
+ + +
+ +
+

Drag and drop a .KTX2 or image file here, or click to select a file.

+ +
+
+ + + +
+ + KTX2 Texture Format to Encode: + + +
+
+ + Primary compression quality/effort options: + +
+ Use unified quality/effort options (overrides below low-level options): + + +
+ + + 2 + +

+ + + + 80 + +
+
+ + Display/Visualization Options: +
+ + + + + +
+ +
+ Disabled + +
+ + + + 1.0000 + +
+ + + + 0 + +
+ + + + 0 + +
+ + + + 0 + + +
+ + + + 1x + +
+ + Bilinear Filtering: + + +
+ +
+ + Transcoder Options (Decode Flags): + +
+ ETC1S: No BC7 Chroma Artifact Filtering (faster transcoding): + + +
+ XUASTC/ASTC LDR: Disable deblocking filtering (faster): + -
-
+
+ XUASTC/ASTC LDR: Stronger deblocking filtering: + - KTX2 Texture Format to Encode: - -
+
+ XUASTC/ASTC LDR: Use deblocking on all block sizes (slower): + -
+
+ XUASTC LDR 4x4/6x6/8x6: No direct BC7 transcoding (slower/higher quality): + - ETC1S LDR Options: -
- ETC1S Quality: - - 255 +
-
- - - 2 + Prefer higher quality transcoding when supported (slower): + -
+
+ +
- UASTC LDR 4x4 Options: -
- - - 1 + Low-level ETC1S LDR Options: +
+ ETC1S Quality: + + 255 -
+
+ + + 1 - UASTC LDR RDO: - +
- - - 1.0 + Low-level UASTC LDR 4x4 Options: -
+
+ UASTC LDR RDO: + - UASTC HDR 4x4 Options: + + + 1.0 -
- - - 0 -
+
+ + + 1 -
+
- ASTC HDR 6x6 Options: -
+
- - - 0 -
+ Low-level UASTC HDR 4x4 Options: - RDO Quality (Lambda, 0-50k, try 0-5k, higher=smaller): - -
+
+ + + 0 +
- REC 2020 Colorspace: - +
-
+ Low-level ASTC/UASTC HDR 6x6 Options: +
- LDR->HDR Upconversion Options: -
- Convert LDR images to linear light: - + + + 0 +
-
- LDR to HDR Upconversion Nit Multiplier: - - -
+ RDO Quality (Lambda, 0-50k, try 0-5k, higher=smaller): + +
-
+ REC 2020 Colorspace: + - Other Options: +
-
- Use sRGB/perceptual metrics: - -
- Generate mipmaps: - + LDR->HDR Upconversion Options: +
+ Convert LDR images to linear light: + -
-
- Debug Output (See Dev Console): - - Compute Stats: - +
+ LDR to HDR Upconversion Nit Multiplier: + + +
+ +
+ Low-level XUASTC/ASTC LDR 4x4-12x12 Options: +
+ + XUASTC LDR Syntax: + + +
+ + + + 2 +
+ + + + 80 + +
+ Bounded/windowed RDO lossy supercompression: + + +
+ No RGB dual plane (lower quality, faster encoding/BC7 transcoding): + + +
+ No 2-3 subset usage (lower quality, faster encoding/BC7 transcoding): + -
+

+ ASTC/XUASTC LDR Bounded/Windowed RDO Params: + +
+ Opaque: +
+ +
+ +
+ - Log Output: +
+ +
Alpha:
+ +
+ +
+ +
-
+
+ Other Options: + +
+ Image is sRGB/use sRGB perceptual metrics: + + +
+ Y flip source image: + + +
+
+ Debug Output (See Dev Console): + + Compute Stats (slower encoding): + + +
+ + Mipmap Generation Options: +
+ + Generate mipmap levels: + + +
+ + + + +
+ + + + +
- + + + +
+ + Mip Renormalize: + + + Mip Wrapping: + - +
+ + Log Output: +
+
+ + + +
+ +
+ +
+ +
+ diff --git a/external/basis_universal/webgl/ktx2_encode_test/renderer.js b/external/basis_universal/webgl/ktx2_encode_test/renderer.js index 8de7e58cf3..cfd1b005ff 100644 --- a/external/basis_universal/webgl/ktx2_encode_test/renderer.js +++ b/external/basis_universal/webgl/ktx2_encode_test/renderer.js @@ -10,7 +10,7 @@ var Renderer = function (gl) { * @private */ this.gl_ = gl; - + /** * The WebGLProgram. * @type {WebGLProgram} @@ -53,11 +53,19 @@ var Renderer = function (gl) { */ this.quadVertexBuffer_ = gl.createBuffer(); gl.bindBuffer(gl.ARRAY_BUFFER, this.quadVertexBuffer_); + var vertices = new Float32Array( [-1.0, -1.0, 0.0, 1.0, +1.0, -1.0, 1.0, 1.0, -1.0, +1.0, 0.0, 0.0, 1.0, +1.0, 1.0, 0.0]); + +// var vertices = new Float32Array( +// [-1.0, -1.0, 0.0, .5, +// +1.0, -1.0, .5, .5, +// -1.0, +1.0, 0.0, 0.0, +// 1.0, +1.0, .5, 0.0]); + gl.bufferData(gl.ARRAY_BUFFER, vertices, gl.STATIC_DRAW); @@ -90,7 +98,7 @@ var Renderer = function (gl) { Renderer.prototype.finishInit = function () { - this.draw(); + //this.draw(); }; @@ -156,12 +164,12 @@ Renderer.prototype.createHalfRGBATexture = function (data, width, height, format return tex; }; -// WebGL requires each row of rgb565Data to be aligned on a 4-byte boundary. +// WebGL requires each row of rgb565Data to be aligned on a 4-byte boundary. Renderer.prototype.createRgb565Texture = function (rgb565Data, width, height) { var gl = this.gl_; var tex = gl.createTexture(); gl.bindTexture(gl.TEXTURE_2D, tex); - + gl.texImage2D( gl.TEXTURE_2D, 0, @@ -203,7 +211,7 @@ Renderer.prototype.createRgbaTexture = function (rgbaData, width, height) { }; -Renderer.prototype.drawTexture = function (texture, width, height, mode, scale, linearToSRGBFlag) { +Renderer.prototype.drawTexture = function (texture, width, height, mode, scale, linearToSRGBFlag, useLinearFiltering) { var gl = this.gl_; // draw scene gl.clearColor(0, 0, 0, 1); @@ -213,6 +221,11 @@ Renderer.prototype.drawTexture = function (texture, width, height, mode, scale, gl.activeTexture(gl.TEXTURE0); gl.bindTexture(gl.TEXTURE_2D, texture); + + // Point vs. bilinear sampling (no mipmaps involved here) + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, useLinearFiltering ? gl.LINEAR : gl.NEAREST); + gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, useLinearFiltering ? gl.LINEAR : gl.NEAREST); + gl.uniform1i(this.uniformLocations_.texSampler, 0); var x = 0.0; @@ -250,7 +263,7 @@ Renderer.prototype.compileShader_ = function (shaderSource, type) { var shader = gl.createShader(type); gl.shaderSource(shader, shaderSource); gl.compileShader(shader); - + // Check for errors const compiled = gl.getShaderParameter(shader, gl.COMPILE_STATUS); if (!compiled) { @@ -259,7 +272,7 @@ Renderer.prototype.compileShader_ = function (shaderSource, type) { gl.deleteShader(shader); // Cleanup shader object throw new Error('Shader compilation failed'); } - + return shader; }; @@ -310,3 +323,4 @@ Renderer.fragmentShaderSource_ = [ ' gl_FragColor = c;', '}' ].join('\n'); + diff --git a/external/basis_universal/webgl/texture_test/assets/base.basis b/external/basis_universal/webgl/texture_test/assets/base.basis new file mode 100644 index 0000000000..e7e102e74f Binary files /dev/null and b/external/basis_universal/webgl/texture_test/assets/base.basis differ diff --git a/external/basis_universal/webgl/texture_test/assets/license.txt b/external/basis_universal/webgl/texture_test/assets/license.txt new file mode 100644 index 0000000000..e06b8ffc57 --- /dev/null +++ b/external/basis_universal/webgl/texture_test/assets/license.txt @@ -0,0 +1 @@ +The test images in this directory are not required to build or use the Basis Universal codec. They are not copyrighted or owned by Binomial LLC. diff --git a/external/basis_universal/webgl/texture_test/index.html b/external/basis_universal/webgl/texture_test/index.html index 4657951c82..ef88f8c5da 100644 --- a/external/basis_universal/webgl/texture_test/index.html +++ b/external/basis_universal/webgl/texture_test/index.html @@ -1,6 +1,7 @@ diff --git a/external/basis_universal/webgl/transcoder/README.md b/external/basis_universal/webgl/transcoder/README.md index edb5c3098c..1553a3a8df 100644 --- a/external/basis_universal/webgl/transcoder/README.md +++ b/external/basis_universal/webgl/transcoder/README.md @@ -1,3 +1,5 @@ +`basisu_wrappers.cpp` contains our JavaScript API (implemented via [emscripten](https://emscripten.org/) bindings), which is a thin layer above our encoder and transcoder's C++ API's. As of Basis Universal v2.0 it supports optional WASM multithreading and WASM64. + Prebuilt versions of `basis_transcoder.js` and `basis_transcoder.wasm` are included in the `build/` folder, and are sufficient for local demos. To build the transcoder yourself, first install emscripten ([tutorial](https://webassembly.org/getting-started/developers-guide/)) and cmake ([download](https://cmake.org/download/)). Then run: ```shell diff --git a/external/basis_universal/webgl/transcoder/basis_wrappers.cpp b/external/basis_universal/webgl/transcoder/basis_wrappers.cpp index 091dd799ad..99bd14e58b 100644 --- a/external/basis_universal/webgl/transcoder/basis_wrappers.cpp +++ b/external/basis_universal/webgl/transcoder/basis_wrappers.cpp @@ -11,11 +11,11 @@ // getFileDesc(), getImageDesc(), getImageLevelDesc(): These functions return low-level information about where compressed data is located for each image in a .basis file. // This is useful for when you want to extract the compressed data and embed it into your own file formats, for container independent transcoding. // -// 2. Encoding (optional): See class basis_encoder. Encodes LDR .PNG or 32bpp images, or HDR half-float/float or .EXR/.HDR images to .basis/.ktx2 files in memory. +// 2. Encoding (optional): See class basis_encoder. Encodes LDR .PNG or 32bpp images, or HDR half-float/float or .EXR/.HDR images to .basis/.ktx2 files in memory. // Must compile with BASISU_SUPPORT_ENCODING=1. // Requires basisu_transcoder.cpp as well as all the .cpp files in the "encoder" directory. Results in a larger WebAssembly executable. // -// 3. Low level transcoding/container independent transcoding: See class lowlevel_etc1s_image_transcoder or function transcodeUASTCImage(). +// 3. Low level transcoding/container independent transcoding: See class lowlevel_etc1s_image_transcoder or function transcodeUASTCImage(). // For transcoding raw compressed ETC1S/UASTC LDR/UASTC HDR texture data from non-.basis files (say from KTX2) to GPU texture data. // // 4. Helpers, transcoder texture format information: See functions getBytesPerBlockOrPixel(), formatHasAlpha(), etc. @@ -27,10 +27,20 @@ // Enable debug printf()'s in this module. #ifndef BASISU_DEBUG_PRINTF +// DO NOT CHECK IN #define BASISU_DEBUG_PRINTF 0 #endif -#define BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS (6291456) +// This check can be removed, but you risk crashing on larger images in 32-bit WASM. Also, ETC1S/UASTC LDR 4x4 encoding uses way less memory than UASTC HDR 6x6 encoding, so you could boost this in those cases. +// 32-bit WASM limitation (TODO: remove for 64-bit), to prevent OOM crashes during HDR encoding in particular. +// TODO: Even WASM64 in Chrome has limits which seem too low for us. For now, just impose this limit. +#ifdef __wasm64__ + #define BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS (1024*1024*12) + #define BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS_HIGHER_LIMIT (1024*1024*12) +#else + #define BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS (1024*1024*4) + #define BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS_HIGHER_LIMIT (1024*1024*12) +#endif #include "basisu_transcoder.h" #include @@ -60,7 +70,14 @@ void basis_init() std::lock_guard lock(s_init_mutex); #if BASISU_DEBUG_PRINTF - printf("basis_init()\n"); + printf("basis_init() " BASISD_VERSION_STRING " "); +#ifdef __wasm64__ + printf("WASM64 "); +#endif +#ifdef WASM_THREADS_ENABLED + printf("PTHREADS"); +#endif + printf("\n"); #endif if (g_basis_initialized_flag) @@ -75,17 +92,24 @@ void basis_init() g_basis_initialized_flag = true; } -static void copy_from_jsbuffer(const emscripten::val& srcBuffer, basisu::vector& dstVec) +#if 0 +// Old copy methods, used in previous builds for plain WASM (not WASM64). + +// false if resize() fails +static bool copy_from_jsbuffer(const emscripten::val& srcBuffer, basisu::vector& dstVec) { unsigned int length = srcBuffer["length"].as(); - dstVec.resize(length); + if (!dstVec.try_resize(length)) + return false; emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = srcBuffer["constructor"].new_(memory, reinterpret_cast(dstVec.data()), length); // Copy the bytes from the Javascript buffer. memoryView.call("set", srcBuffer); + + return true; } static bool copy_to_jsbuffer(const emscripten::val& dstBuffer, const basisu::vector& srcVec) @@ -118,6 +142,36 @@ static bool copy_to_jsbuffer(const emscripten::val& dstBuffer, const basisu::vec return true; } +#else +// New methods, compatible with WASM64. +static bool copy_from_jsbuffer(const emscripten::val& srcBuffer, basisu::vector& dstVec) +{ + const size_t length = srcBuffer["length"].as(); + if (!dstVec.try_resize(length)) + return false; + + // View over dstVec in WASM memory; copy from JS buffer into it. + emscripten::val dstView = emscripten::val(emscripten::typed_memory_view(length, dstVec.data())); + dstView.call("set", srcBuffer); + return true; +} + +// WASM -> JS +static bool copy_to_jsbuffer(const emscripten::val& dstBuffer, const basisu::vector& srcVec) +{ + if (srcVec.empty()) + return false; + + const size_t dstLen = dstBuffer["byteLength"].as(); + if (srcVec.size() > dstLen) + return false; + + // View over srcVec; copy into provided JS TypedArray. + emscripten::val srcView = emscripten::val(emscripten::typed_memory_view(srcVec.size(), const_cast(srcVec.data()))); + dstBuffer.call("set", srcView); + return true; +} +#endif const uint32_t BASIS_MAGIC = 0xD4ADBEA1; const uint32_t KTX2_MAGIC = 0xD4ADBEF2; @@ -133,7 +187,7 @@ struct basis_file_desc uint32_t m_userdata0; uint32_t m_userdata1; - // Type of texture (cETC1S, cUASTC4x4, cUASTC_HDR_4x4, etc.) + // Type of texture (cETC1S, cUASTC_LDR_4x4, cUASTC_HDR_4x4, etc.) uint32_t m_tex_format; // basis_tex_format bool m_y_flipped; @@ -190,10 +244,7 @@ struct basis_file basisu::vector m_file; basis_file(const emscripten::val& jsBuffer) - : m_file([&]() { - size_t byteLength = jsBuffer["byteLength"].as(); - return basisu::vector(byteLength); - }()) + : m_file(jsBuffer["byteLength"].as()) { if (!g_basis_initialized_flag) { @@ -204,11 +255,27 @@ struct basis_file return; } +#if 0 unsigned int length = jsBuffer["length"].as(); emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = jsBuffer["constructor"].new_(memory, reinterpret_cast(m_file.data()), length); memoryView.call("set", jsBuffer); +#else + const size_t n = jsBuffer["byteLength"].as(); + if (!n) + { +#if BASISU_DEBUG_PRINTF + printf("basis_file::basis_file: zero size file\n"); +#endif + m_file.clear(); + return; + } + + emscripten::val dstView = emscripten::val(emscripten::typed_memory_view(n, m_file.data())); + dstView.call("set", jsBuffer); +#endif + if (!m_transcoder.validate_header(m_file.data(), m_file.size())) { #if BASISU_DEBUG_PRINTF @@ -291,7 +358,7 @@ struct basis_file return orig_height; } - // Returns a basis_tex_format (cETC1S, cUASTC, cUASTC_HDR_4x4, etc.) + // Returns a basis_tex_format (cETC1S, cUASTC_LDR_4x4, cUASTC_HDR_4x4, etc. - see basiu_file_headers.h) uint32_t getBasisTexFormat() { assert(m_magic == BASIS_MAGIC); @@ -302,7 +369,7 @@ struct basis_file return (uint32_t)fmt; } - // Currently 4 or 6 + // Returns 4-12 uint32_t getBlockWidth() const { assert(m_magic == BASIS_MAGIC); @@ -313,7 +380,7 @@ struct basis_file return basis_tex_format_get_block_width(fmt); } - // Currently 4 or 6 + // Returns 4-12 uint32_t getBlockHeight() { assert(m_magic == BASIS_MAGIC); @@ -423,12 +490,13 @@ struct basis_file return result; } + // format is transcoder_texture_format uint32_t getImageTranscodedSizeInBytes(uint32_t image_index, uint32_t level_index, uint32_t format) { assert(m_magic == BASIS_MAGIC); if (m_magic != BASIS_MAGIC) return 0; - + if (format >= (int)transcoder_texture_format::cTFTotalTextureFormats) { assert(0); @@ -436,7 +504,7 @@ struct basis_file } const transcoder_texture_format tex_format = static_cast(format); - + uint32_t orig_width, orig_height, total_src_blocks; if (!m_transcoder.get_image_level_desc(m_file.data(), m_file.size(), image_index, level_index, orig_width, orig_height, total_src_blocks)) { @@ -454,7 +522,7 @@ struct basis_file if (m_magic != BASIS_MAGIC) return false; - return m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()) == basis_tex_format::cUASTC4x4; + return m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()) == basis_tex_format::cUASTC_LDR_4x4; } bool isETC1S() @@ -474,9 +542,9 @@ struct basis_file return false; basis_tex_format fmt = m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()); - return (fmt == basis_tex_format::cETC1S) || (fmt == basis_tex_format::cUASTC4x4); + return (fmt == basis_tex_format::cETC1S) || (fmt == basis_tex_format::cUASTC_LDR_4x4); } - + // True if the texture is UASTC HDR 4x4 or ASTC HDR 6x6. // In this case, it can only be transcoded to BC6H, ASTC HDR (of the same block dimensions, currently 4x4 or 6x6), RGB9E5 or half-float RGB/RGBA images. bool isHDR() @@ -488,7 +556,7 @@ struct basis_file basis_tex_format fmt = m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()); return basis_tex_format_is_hdr(fmt); } - + bool isHDR4x4() { assert(m_magic == BASIS_MAGIC); @@ -506,9 +574,31 @@ struct basis_file return false; basis_tex_format fmt = m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()); - return (fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + return (fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE); } + // True for plain ASTC LDR 4x4-12x12 + bool isASTC_LDR() + { + assert(m_magic == BASIS_MAGIC); + if (m_magic != BASIS_MAGIC) + return false; + + basis_tex_format fmt = m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()); + return basis_tex_format_is_astc_ldr(fmt); + } + + // True for XUASTC LDR 4x4-12x12 + bool isXUASTC_LDR() + { + assert(m_magic == BASIS_MAGIC); + if (m_magic != BASIS_MAGIC) + return false; + + basis_tex_format fmt = m_transcoder.get_basis_tex_format(m_file.data(), m_file.size()); + return basis_tex_format_is_xuastc_ldr(fmt); + } + uint32_t startTranscoding() { assert(m_magic == BASIS_MAGIC); @@ -519,6 +609,7 @@ struct basis_file } // Here for backwards compat, prefer transcodeImageWithFlags(). + // format is transcoder_texture_format uint32_t transcodeImage(const emscripten::val& dst, uint32_t image_index, uint32_t level_index, uint32_t format, uint32_t unused, uint32_t get_alpha_for_opaque_formats) { (void)unused; @@ -531,7 +622,7 @@ struct basis_file return 0; const transcoder_texture_format transcoder_format = static_cast(format); - + uint32_t orig_width, orig_height, total_src_blocks; if (!m_transcoder.get_image_level_desc(m_file.data(), m_file.size(), image_index, level_index, orig_width, orig_height, total_src_blocks)) return 0; @@ -539,10 +630,11 @@ struct basis_file basisu::vector dst_data; uint32_t flags = get_alpha_for_opaque_formats ? cDecodeFlagsTranscodeAlphaDataToOpaqueFormats : 0; - + const uint32_t transcoded_size_in_bytes = getImageTranscodedSizeInBytes(image_index, level_index, format); - dst_data.resize(transcoded_size_in_bytes); + if (!dst_data.try_resize(transcoded_size_in_bytes)) + return 0; uint32_t status; @@ -568,15 +660,25 @@ struct basis_file flags); } +#if 0 emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = emscripten::val::global("Uint8Array").new_(memory, reinterpret_cast(dst_data.data()), dst_data.size()); - dst.call("set", memoryView); +#else + if (!dst_data.empty()) + { + const size_t n = dst_data.size(); + emscripten::val srcView = emscripten::val(emscripten::typed_memory_view(n, dst_data.data())); + dst.call("set", srcView); // 'dst' is a JS Uint8Array + } +#endif + return status; } - - // Like transcodeImage(), but with fixed parameters. + + // Like transcodeImage(), but with updated parameters. // For flags, see cDecodeFlagsPVRTCDecodeToNextPow2 etc. + // format is transcoder_texture_format uint32_t transcodeImageWithFlags(const emscripten::val& dst, uint32_t image_index, uint32_t level_index, uint32_t format, uint32_t flags) { assert(m_magic == BASIS_MAGIC); @@ -587,16 +689,17 @@ struct basis_file return 0; const transcoder_texture_format transcoder_format = static_cast(format); - + uint32_t orig_width, orig_height, total_src_blocks; if (!m_transcoder.get_image_level_desc(m_file.data(), m_file.size(), image_index, level_index, orig_width, orig_height, total_src_blocks)) return 0; basisu::vector dst_data; - + const uint32_t transcoded_size_in_bytes = getImageTranscodedSizeInBytes(image_index, level_index, format); - dst_data.resize(transcoded_size_in_bytes); + if (!dst_data.try_resize(transcoded_size_in_bytes)) + return 0; uint32_t status; @@ -622,10 +725,16 @@ struct basis_file flags); } +#if 0 emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = emscripten::val::global("Uint8Array").new_(memory, reinterpret_cast(dst_data.data()), dst_data.size()); - dst.call("set", memoryView); +#else + const size_t n = dst_data.size(); + emscripten::val srcView = emscripten::val(emscripten::typed_memory_view(n, dst_data.data())); + dst.call("set", srcView); // 'dst' is a JS Uint8Array +#endif + return status; } }; @@ -658,10 +767,7 @@ struct ktx2_file bool m_is_valid = false; ktx2_file(const emscripten::val& jsBuffer) - : m_file([&]() { - size_t byteLength = jsBuffer["byteLength"].as(); - return basisu::vector(byteLength); - }()) + : m_file(jsBuffer["byteLength"].as()) { if (!g_basis_initialized_flag) { @@ -672,10 +778,16 @@ struct ktx2_file return; } +#if 0 unsigned int length = jsBuffer["length"].as(); emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = jsBuffer["constructor"].new_(memory, reinterpret_cast(m_file.data()), length); memoryView.call("set", jsBuffer); +#else + const size_t n = jsBuffer["byteLength"].as(); + emscripten::val dstView = emscripten::val(emscripten::typed_memory_view(n, m_file.data())); + dstView.call("set", jsBuffer); +#endif if (!m_transcoder.init(m_file.data(), m_file.size())) { @@ -764,8 +876,8 @@ struct ktx2_file hdr.m_kvd_byte_offset = h.m_kvd_byte_offset; hdr.m_kvd_byte_length = h.m_kvd_byte_length; - hdr.m_sgd_byte_offset = (uint32_t)h.m_sgd_byte_offset.get_uint64(); - hdr.m_sgd_byte_length = (uint32_t)h.m_sgd_byte_length.get_uint64(); + hdr.m_sgd_byte_offset = static_cast(h.m_sgd_byte_offset.get_uint64()); + hdr.m_sgd_byte_length = static_cast(h.m_sgd_byte_length.get_uint64()); return hdr; } @@ -823,6 +935,7 @@ struct ktx2_file return 1; } + // The image's original width, i.e. before being potentially expanded up to blocks. uint32_t getWidth() { assert(m_magic == KTX2_MAGIC); @@ -831,6 +944,7 @@ struct ktx2_file return m_transcoder.get_width(); } + // The image's original height, i.e. before being potentially expanded up to blocks. uint32_t getHeight() { assert(m_magic == KTX2_MAGIC); @@ -839,6 +953,7 @@ struct ktx2_file return m_transcoder.get_height(); } + // 4-12 uint32_t getBlockWidth() { assert(m_magic == KTX2_MAGIC); @@ -847,6 +962,7 @@ struct ktx2_file return m_transcoder.get_block_width(); } + // 4-12 uint32_t getBlockHeight() { assert(m_magic == KTX2_MAGIC); @@ -855,6 +971,7 @@ struct ktx2_file return m_transcoder.get_block_height(); } + // 2D or cubemaps uint32_t getFaces() { assert(m_magic == KTX2_MAGIC); @@ -863,6 +980,7 @@ struct ktx2_file return m_transcoder.get_faces(); } + // Layers for tex arrays uint32_t getLayers() { assert(m_magic == KTX2_MAGIC); @@ -871,6 +989,7 @@ struct ktx2_file return m_transcoder.get_layers(); } + // Mip-map levels uint32_t getLevels() { assert(m_magic == KTX2_MAGIC); @@ -879,7 +998,7 @@ struct ktx2_file return m_transcoder.get_levels(); } - // Returns a basis_tex_format: cETC1S, cUASTC4x4, or cUASTC_HDR_4x4, etc. + // Returns a basis_tex_format: cETC1S, cUASTC_LDR_4x4, or cUASTC_HDR_4x4, etc. - see basisu_file_headers.h uint32_t getBasisTexFormat() { assert(m_magic == KTX2_MAGIC); @@ -922,7 +1041,7 @@ struct ktx2_file return m_transcoder.is_etc1s(); } - // Returns true if the texture is UASTC HDR or ASTC HDR. In this case, it can only be transcoded to BC6H, ASTC HDR (of the same block dimensions), RGB9E5 or half-float RGB/RGBA images. + // Returns true if the texture is UASTC HDR or ASTC HDR. In this case, it can only be transcoded to BC6H, ASTC HDR (of the same block dimensions), RGB9E5 or half-float RGB/RGBA images. bool isHDR() { assert(m_magic == KTX2_MAGIC); @@ -947,6 +1066,22 @@ struct ktx2_file return m_transcoder.is_hdr_6x6(); } + bool isASTC_LDR() + { + assert(m_magic == KTX2_MAGIC); + if (m_magic != KTX2_MAGIC) + return false; + return m_transcoder.is_astc_ldr(); + } + + bool isXUASTC_LDR() + { + assert(m_magic == KTX2_MAGIC); + if (m_magic != KTX2_MAGIC) + return false; + return m_transcoder.is_xuastc_ldr(); + } + bool getHasAlpha() { assert(m_magic == KTX2_MAGIC); @@ -979,6 +1114,14 @@ struct ktx2_file return m_transcoder.get_dfd_transfer_func(); } + bool isSRGB() + { + assert(m_magic == KTX2_MAGIC); + if (m_magic != KTX2_MAGIC) + return 0; + return m_transcoder.is_srgb(); + } + uint32_t getDFDFlags() { assert(m_magic == KTX2_MAGIC); @@ -1020,6 +1163,7 @@ struct ktx2_file return m_transcoder.is_video(); } + // The linear light LDR->HDR upconversion multiplier used (def=100.0 nits) float getLDRHDRUpconversionNitMultiplier() { assert(m_magic == KTX2_MAGIC); @@ -1056,6 +1200,7 @@ struct ktx2_file return info; } + // format is transcoder_texture_format uint32_t getImageTranscodedSizeInBytes(uint32_t level_index, uint32_t layer_index, uint32_t face_index, uint32_t format) { assert(m_magic == KTX2_MAGIC); @@ -1069,7 +1214,7 @@ struct ktx2_file } const transcoder_texture_format tex_format = static_cast(format); - + ktx2_image_level_info info; if (!m_transcoder.get_image_level_info(info, level_index, layer_index, face_index)) { @@ -1094,6 +1239,7 @@ struct ktx2_file // Here for backwards compat, prefer transcodeImageWithFlags(). // get_alpha_for_opaque_formats defaults to false // channel0/channel1 default to -1 + // format is transcoder_texture_format uint32_t transcodeImage(const emscripten::val& dst, uint32_t level_index, uint32_t layer_index, uint32_t face_index, uint32_t format, uint32_t get_alpha_for_opaque_formats, int channel0, int channel1) { assert(m_magic == KTX2_MAGIC); @@ -1107,7 +1253,7 @@ struct ktx2_file const uint32_t dst_block_width = basis_get_block_width(transcoder_format); const uint32_t dst_block_height = basis_get_block_height(transcoder_format); - + ktx2_image_level_info info; if (!m_transcoder.get_image_level_info(info, level_index, layer_index, face_index)) return 0; @@ -1120,7 +1266,8 @@ struct ktx2_file const uint32_t transcoded_size_in_bytes = getImageTranscodedSizeInBytes(level_index, layer_index, face_index, format); - dst_data.resize(transcoded_size_in_bytes); + if (!dst_data.try_resize(transcoded_size_in_bytes)) + return 0; uint32_t status; @@ -1151,15 +1298,22 @@ struct ktx2_file nullptr); } +#if 0 emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = emscripten::val::global("Uint8Array").new_(memory, reinterpret_cast(dst_data.data()), dst_data.size()); - dst.call("set", memoryView); +#else + const size_t n = dst_data.size(); + emscripten::val srcView = emscripten::val(emscripten::typed_memory_view(n, dst_data.data())); + dst.call("set", srcView); // 'dst' must be a Uint8Array (or compatible TypedArray) +#endif + return status; } - + // like transcodeImage(), but with fixed parameters (includes flags) // For flags, see cDecodeFlagsPVRTCDecodeToNextPow2 etc. + // format is transcoder_texture_format uint32_t transcodeImageWithFlags(const emscripten::val& dst, uint32_t level_index, uint32_t layer_index, uint32_t face_index, uint32_t format, uint32_t flags, int channel0, int channel1) { assert(m_magic == KTX2_MAGIC); @@ -1168,12 +1322,12 @@ struct ktx2_file if (format >= (int)transcoder_texture_format::cTFTotalTextureFormats) return 0; - + const transcoder_texture_format transcoder_format = static_cast(format); const uint32_t dst_block_width = basis_get_block_width(transcoder_format); const uint32_t dst_block_height = basis_get_block_height(transcoder_format); - + ktx2_image_level_info info; if (!m_transcoder.get_image_level_info(info, level_index, layer_index, face_index)) return 0; @@ -1215,10 +1369,16 @@ struct ktx2_file nullptr); } +#if 0 emscripten::val memory = emscripten::val::module_property("HEAP8")["buffer"]; emscripten::val memoryView = emscripten::val::global("Uint8Array").new_(memory, reinterpret_cast(dst_data.data()), dst_data.size()); - dst.call("set", memoryView); +#else + const size_t n = dst_data.size(); + emscripten::val srcView = emscripten::val(emscripten::typed_memory_view(n, dst_data.data())); + dst.call("set", srcView); // dst = JS Uint8Array +#endif + return status; } @@ -1234,10 +1394,19 @@ enum class ldr_image_type cJPGImage = 2 }; +enum xuastc_ldr_syntax +{ + cFullArith = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullArith, + cHybridArithZStd = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cHybridArithZStd, + cFullZStd = (int)basist::astc_ldr_t::xuastc_ldr_syntax::cFullZStd, + cTotal = 3 +}; + class basis_encoder { bool m_threading_enabled = false; uint32_t m_num_extra_worker_threads = 0; + float m_last_encode_mip0_rgba_psnr = 0.0f; public: basis_compressor_params m_params; @@ -1252,7 +1421,7 @@ class basis_encoder m_num_extra_worker_threads = num_extra_worker_threads; } - // Only works for LDR inputs. + // Only valid for LDR inputs. bool set_slice_source_image(uint32_t slice_index, const emscripten::val& src_image_js_val, uint32_t src_image_width, uint32_t src_image_height, ldr_image_type img_type) { // Resize the source_images array if necessary @@ -1261,7 +1430,8 @@ class basis_encoder // First copy the src image buffer to the heap. basisu::vector src_image_buf; - copy_from_jsbuffer(src_image_js_val, src_image_buf); + if (!copy_from_jsbuffer(src_image_js_val, src_image_buf)) + return false; // Now load the source image. image& src_img = m_params.m_source_images[slice_index]; @@ -1328,14 +1498,15 @@ class basis_encoder hdr_image_type img_type, bool ldr_srgb_to_linear_conversion, float ldr_to_hdr_nit_multiplier) { assert(ldr_to_hdr_nit_multiplier > 0.0f); - + // Resize the source_images_hdr array if necessary if (slice_index >= m_params.m_source_images_hdr.size()) m_params.m_source_images_hdr.resize(slice_index + 1); // First copy the src image buffer to the heap. basisu::vector src_image_buf; - copy_from_jsbuffer(src_image_js_val, src_image_buf); + if (!copy_from_jsbuffer(src_image_js_val, src_image_buf)) + return false; // Now load the source image. imagef& src_img = m_params.m_source_images_hdr[slice_index]; @@ -1345,14 +1516,14 @@ class basis_encoder if ((img_type == hdr_image_type::cHITPNGImage) || (img_type == hdr_image_type::cHITJPGImage)) { - // Because we're loading the image ourselves we need to add these tags so the UI knows how to tone map LDR upconverted outputs. + // Because we're loading the image ourselves we need to add these tags so the UI knows how to tone map LDR upconverted outputs. // Normally basis_compressor adds them when it loads the images itself from source files. basist::ktx2_add_key_value(m_params.m_ktx2_key_values, "LDRUpconversionMultiplier", fmt_string("{}", ldr_to_hdr_nit_multiplier)); if (ldr_srgb_to_linear_conversion) basist::ktx2_add_key_value(m_params.m_ktx2_key_values, "LDRUpconversionSRGBToLinear", "1"); } - + return true; } @@ -1370,7 +1541,7 @@ class basis_encoder // We don't use threading for now, but the compressor needs a job pool. uint32_t num_new_threads = 0; bool enable_threading = false; - + #if WASM_THREADS_ENABLED if ((emscripten_has_threading_support()) && (m_threading_enabled) && (m_num_extra_worker_threads)) { @@ -1384,20 +1555,29 @@ class basis_encoder // Initialize the compression parameters structure. This is the same structure that the command line tool fills in. basis_compressor_params ¶ms = m_params; - + // Check to see if we would risk running out of memory in 32-bit WASM. There's not much we can do about this limit until memory64 is available. uint64_t total_src_texels = 0; - + for (uint32_t i = 0; i < m_params.m_source_images.size(); i++) total_src_texels += m_params.m_source_images[i].get_total_pixels(); for (uint32_t i = 0; i < m_params.m_source_images_hdr.size(); i++) total_src_texels += m_params.m_source_images_hdr[i].get_total_pixels(); + + // Try to prevent running out of memory inside WASM. + uint32_t max_pixels_thresh = BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS; - if (total_src_texels > BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS) + // The simpler compressors need less temporary memory, so their threshold can be higher. + if (m_params.is_etc1s() || m_params.is_uastc_ldr_4x4() || m_params.is_uastc_hdr_4x4()) { - printf("ERROR: basis_encoder::encode(): The total number of source texels to compress is too large for 32-bit WASM (above BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS in basis_wrappers.cpp)." - "This is not a fundamental limitation of the library, but of WASM. Processing images this large risks running out of memory until WASM memory64 is available.\n"); + max_pixels_thresh = BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS_HIGHER_LIMIT; + } + + if (total_src_texels > max_pixels_thresh) + { + printf("ERROR: basis_encoder::encode(): The total number of source texels to compress %llu is greater than %u, which is likely too large for WASM (above BASISU_ENCODER_MAX_SOURCE_IMAGE_PIXELS in basis_wrappers.cpp).", + total_src_texels, max_pixels_thresh); return 0; } @@ -1438,6 +1618,17 @@ class basis_encoder #endif return 0; } + + m_last_encode_mip0_rgba_psnr = 0.0f; + if (comp.get_stats().size()) + { + float psnr = comp.get_stats()[0].m_basis_rgba_avg_psnr; + + if (psnr == 0.0f) + psnr = comp.get_stats()[0].m_basis_rgb_avg_psnr; // HDR, not RGBA though + + m_last_encode_mip0_rgba_psnr = psnr; + } if (params.m_create_ktx2_file) { @@ -1458,6 +1649,11 @@ class basis_encoder return (uint32_t)comp.get_output_basis_file().size(); } } + + float get_last_encode_mip0_rgba_psnr() const + { + return m_last_encode_mip0_rgba_psnr; + } }; #endif @@ -1474,8 +1670,10 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder bool decode_palettes(uint32_t num_endpoints, const emscripten::val& endpoint_data, uint32_t num_selectors, const emscripten::val& selector_data) { basisu::vector temp_endpoint_data, temp_selector_data; - copy_from_jsbuffer(endpoint_data, temp_endpoint_data); - copy_from_jsbuffer(selector_data, temp_selector_data); + if (!copy_from_jsbuffer(endpoint_data, temp_endpoint_data)) + return false; + if (!copy_from_jsbuffer(selector_data, temp_selector_data)) + return false; #if 0 printf("decode_palettes: %u %u %u %u, %u %u\n", @@ -1500,7 +1698,8 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder bool decode_tables(const emscripten::val& table_data) { basisu::vector temp_table_data; - copy_from_jsbuffer(table_data, temp_table_data); + if (!copy_from_jsbuffer(table_data, temp_table_data)) + return false; if (!temp_table_data.size()) { @@ -1529,7 +1728,7 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder if (!g_basis_initialized_flag) { #if BASISU_DEBUG_PRINTF - printf("transcode_etc1s_image: basis_init() must be called first\n"); + printf("lowlevel_etc1s_image_transcoder::transcode_image: basis_init() must be called first\n"); #endif assert(0); return false; @@ -1537,12 +1736,13 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder // FIXME: Access the JavaScript buffer directly vs. copying it. basisu::vector temp_comp_data; - copy_from_jsbuffer(compressed_data, temp_comp_data); + if (!copy_from_jsbuffer(compressed_data, temp_comp_data)) + return false; if (!temp_comp_data.size()) { #if BASISU_DEBUG_PRINTF - printf("transcode_etc1s_image: compressed_data is empty\n"); + printf("lowlevel_etc1s_image_transcoder::transcode_image: compressed_data is empty\n"); #endif assert(0); return false; @@ -1552,7 +1752,7 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder if (!output_blocks_len) { #if BASISU_DEBUG_PRINTF - printf("transcode_etc1s_image: output_blocks is empty\n"); + printf("lowlevel_etc1s_image_transcoder::transcode_image: output_blocks is empty\n"); #endif assert(0); return false; @@ -1576,7 +1776,7 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder if (!status) { #if BASISU_DEBUG_PRINTF - printf("transcode_etc1s_image: basisu_lowlevel_etc1s_transcoder::transcode_image failed\n"); + printf("lowlevel_etc1s_image_transcoder::transcode_image: basisu_lowlevel_etc1s_transcoder::transcode_image failed\n"); #endif assert(0); return false; @@ -1590,12 +1790,12 @@ class lowlevel_etc1s_image_transcoder : public basisu_lowlevel_etc1s_transcoder }; // Supports UASTC LDR 4x4, UASTC HDR 4x4, and ASTC HDR 6x6/intermediate (but not ETC1S). -bool transcode_uastc_image( - uint32_t basis_tex_format_int, +bool transcode_uastc_image2( + uint32_t basis_tex_format_int, bool use_astc_srgb_decode_profile, uint32_t target_format_int, // see transcoder_texture_format const emscripten::val& output_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, const emscripten::val& compressed_data, - uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, uint32_t slice_offset, uint32_t slice_length, uint32_t decode_flags, // see cDecodeFlagsPVRTCDecodeToNextPow2 etc. bool has_alpha, @@ -1607,13 +1807,13 @@ bool transcode_uastc_image( assert(basis_tex_format_int < (uint32_t)basis_tex_format::cTotalFormats); assert(target_format_int < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + basis_tex_format src_tex_format = static_cast(basis_tex_format_int); transcoder_texture_format target_format = static_cast(target_format_int); - basis_tex_format tex_format = static_cast(basis_tex_format_int); - + if (!g_basis_initialized_flag) { #if BASISU_DEBUG_PRINTF - printf("transcode_uastc_image: basis_init() must be called first\n"); + printf("transcode_uastc_image2: basis_init() must be called first\n"); #endif assert(0); return false; @@ -1621,12 +1821,13 @@ bool transcode_uastc_image( // FIXME: Access the JavaScript buffer directly vs. copying it. basisu::vector temp_comp_data; - copy_from_jsbuffer(compressed_data, temp_comp_data); + if (!copy_from_jsbuffer(compressed_data, temp_comp_data)) + return false; if (!temp_comp_data.size()) { #if BASISU_DEBUG_PRINTF - printf("transcode_uastc_image: compressed_data is empty\n"); + printf("transcode_uastc_image2: compressed_data is empty\n"); #endif assert(0); return false; @@ -1636,7 +1837,7 @@ bool transcode_uastc_image( if (!output_blocks_len) { #if BASISU_DEBUG_PRINTF - printf("transcode_uastc_image: output_blocks is empty\n"); + printf("transcode_uastc_image2: output_blocks is empty\n"); #endif assert(0); return false; @@ -1646,7 +1847,7 @@ bool transcode_uastc_image( printf("format: %u\n", (uint32_t)target_format); printf("output_blocks size: %u buf size: %u\n", output_blocks_len, output_blocks_buf_size_in_blocks_or_pixels); printf("compressed_data size: %u\n", compressed_data["byteLength"].as()); - printf("%u %u %u %u %u\n", num_blocks_x, num_blocks_y, orig_width, orig_height, level_index); + printf("%u %u %u %u %u\n", src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index); printf("%u %u\n", slice_offset, slice_length); printf("%u\n", decode_flags); printf("has_alpha: %u is_video: %u\n", has_alpha, is_video); @@ -1654,8 +1855,27 @@ bool transcode_uastc_image( basisu::vector temp_output_blocks(output_blocks_len); - bool status = false; - if (tex_format == basis_tex_format::cUASTC_HDR_4x4) + bool status = false; + if (basis_tex_format_is_astc_ldr(src_tex_format) || basis_tex_format_is_xuastc_ldr(src_tex_format)) + { + basisu_lowlevel_xuastc_ldr_transcoder transcoder; + + status = transcoder.transcode_image( + src_tex_format, use_astc_srgb_decode_profile, + (transcoder_texture_format)target_format, + &temp_output_blocks[0], output_blocks_buf_size_in_blocks_or_pixels, + &temp_comp_data[0], temp_comp_data.size(), + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, + slice_offset, slice_length, + decode_flags, + has_alpha, + is_video, + output_row_pitch_in_blocks_or_pixels, + nullptr, + output_rows_in_pixels, + channel0, channel1); + } + else if (src_tex_format == basis_tex_format::cUASTC_HDR_4x4) { basisu_lowlevel_uastc_hdr_4x4_transcoder transcoder; @@ -1663,7 +1883,7 @@ bool transcode_uastc_image( (transcoder_texture_format)target_format, &temp_output_blocks[0], output_blocks_buf_size_in_blocks_or_pixels, &temp_comp_data[0], temp_comp_data.size(), - num_blocks_x, num_blocks_y, orig_width, orig_height, level_index, + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, slice_offset, slice_length, decode_flags, has_alpha, @@ -1673,7 +1893,7 @@ bool transcode_uastc_image( output_rows_in_pixels, channel0, channel1); } - else if (tex_format == basis_tex_format::cASTC_HDR_6x6) + else if (src_tex_format == basis_tex_format::cASTC_HDR_6x6) { basisu_lowlevel_astc_hdr_6x6_transcoder transcoder; @@ -1681,7 +1901,7 @@ bool transcode_uastc_image( (transcoder_texture_format)target_format, &temp_output_blocks[0], output_blocks_buf_size_in_blocks_or_pixels, &temp_comp_data[0], temp_comp_data.size(), - num_blocks_x, num_blocks_y, orig_width, orig_height, level_index, + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, slice_offset, slice_length, decode_flags, has_alpha, @@ -1691,15 +1911,15 @@ bool transcode_uastc_image( output_rows_in_pixels, channel0, channel1); } - else if (tex_format == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + else if (src_tex_format == basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) { - basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder transcoder; + basisu_lowlevel_uastc_hdr_6x6_intermediate_transcoder transcoder; status = transcoder.transcode_image( (transcoder_texture_format)target_format, &temp_output_blocks[0], output_blocks_buf_size_in_blocks_or_pixels, &temp_comp_data[0], temp_comp_data.size(), - num_blocks_x, num_blocks_y, orig_width, orig_height, level_index, + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, slice_offset, slice_length, decode_flags, has_alpha, @@ -1709,7 +1929,7 @@ bool transcode_uastc_image( output_rows_in_pixels, channel0, channel1); } - else if (tex_format == basis_tex_format::cUASTC4x4) + else if (src_tex_format == basis_tex_format::cUASTC_LDR_4x4) { basisu_lowlevel_uastc_ldr_4x4_transcoder transcoder; @@ -1717,7 +1937,7 @@ bool transcode_uastc_image( (transcoder_texture_format)target_format, &temp_output_blocks[0], output_blocks_buf_size_in_blocks_or_pixels, &temp_comp_data[0], temp_comp_data.size(), - num_blocks_x, num_blocks_y, orig_width, orig_height, level_index, + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, slice_offset, slice_length, decode_flags, has_alpha, @@ -1735,7 +1955,7 @@ bool transcode_uastc_image( if (!status) { #if BASISU_DEBUG_PRINTF - printf("transcode_uastc_image: basisu_lowlevel_uastc_transcoder::transcode_image failed\n"); + printf("transcode_uastc_image2: basisu_lowlevel_uastc_transcoder::transcode_image failed\n"); #endif assert(0); return false; @@ -1747,82 +1967,154 @@ bool transcode_uastc_image( return true; } +// Previous API - prefer transcode_uastc_image2(), which allows the caller to control the ASTC decode profile (srgb/linear) for XUASTC/ASTC. +bool transcode_uastc_image( + uint32_t basis_tex_format_int, + uint32_t target_format_int, // see transcoder_texture_format + const emscripten::val& output_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const emscripten::val& compressed_data, + uint32_t src_num_blocks_x, uint32_t src_num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, // see cDecodeFlagsPVRTCDecodeToNextPow2 etc. + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + int channel0, int channel1) +{ + // Just assume sRGB decode profile - which is the compressor's default. + const bool use_astc_srgb_decode_profile = true; + + return transcode_uastc_image2( + basis_tex_format_int, use_astc_srgb_decode_profile, + target_format_int, // see transcoder_texture_format + output_blocks, output_blocks_buf_size_in_blocks_or_pixels, + compressed_data, + src_num_blocks_x, src_num_blocks_y, orig_width, orig_height, level_index, + slice_offset, slice_length, + decode_flags, // see cDecodeFlagsPVRTCDecodeToNextPow2 etc. + has_alpha, + is_video, + output_row_pitch_in_blocks_or_pixels, + output_rows_in_pixels, + channel0, channel1); +} + +// transcoder_tex_fmt is transcoder_texture_format uint32_t get_bytes_per_block_or_pixel(uint32_t transcoder_tex_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_get_bytes_per_block_or_pixel(static_cast(transcoder_tex_fmt)); } +// transcoder_tex_fmt is transcoder_texture_format bool format_has_alpha(uint32_t transcoder_tex_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_transcoder_format_has_alpha(static_cast(transcoder_tex_fmt)); } +// transcoder_tex_fmt is transcoder_texture_format bool format_is_hdr(uint32_t transcode_tex_fmt) { assert(transcode_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_transcoder_format_is_hdr(static_cast(transcode_tex_fmt)); } +// transcoder_tex_fmt is transcoder_texture_format bool format_is_ldr(uint32_t transcode_tex_fmt) { assert(transcode_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return !basis_transcoder_format_is_hdr(static_cast(transcode_tex_fmt)); } +// transcoder_tex_fmt is transcoder_texture_format bool format_is_uncompressed(uint32_t transcoder_tex_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_transcoder_format_is_uncompressed(static_cast(transcoder_tex_fmt)); } +// transcoder_tex_fmt is transcoder_texture_format, file_fmt is basis_tex_fmt bool is_format_supported(uint32_t transcoder_tex_fmt, uint32_t file_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); return basis_is_format_supported(static_cast(transcoder_tex_fmt), static_cast(file_fmt)); } -// transcoder_texture_format +// transcoder_tex_fmt is transcoder_texture_format uint32_t get_format_block_width(uint32_t transcoder_tex_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_get_block_width(static_cast(transcoder_tex_fmt)); } -// transcoder_texture_format +// fmt is transcoder_texture_format uint32_t get_format_block_height(uint32_t transcoder_tex_fmt) { assert(transcoder_tex_fmt < (uint32_t)transcoder_texture_format::cTFTotalTextureFormats); return basis_get_block_height(static_cast(transcoder_tex_fmt)); } -// basis_tex_format -uint32_t get_basis_tex_format_block_width(uint32_t fmt) +// file_fmt is basis_tex_format +uint32_t get_basis_tex_format_block_width(uint32_t file_fmt) +{ + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_get_block_width(static_cast(file_fmt)); +} + +// file_fmt is basis_tex_format +uint32_t get_basis_tex_format_block_height(uint32_t file_fmt) { - assert(fmt < (uint32_t)basis_tex_format::cTotalFormats); - return basis_tex_format_get_block_width(static_cast(fmt)); + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_get_block_height(static_cast(file_fmt)); } -// basis_tex_format -uint32_t get_basis_tex_format_block_height(uint32_t fmt) +// file_fmt is basis_tex_format +bool is_basis_tex_format_hdr(uint32_t file_fmt) { - assert(fmt < (uint32_t)basis_tex_format::cTotalFormats); - return basis_tex_format_get_block_height(static_cast(fmt)); + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_is_hdr((basis_tex_format)file_fmt); } -// basis_tex_format -bool is_basis_tex_format_hdr(uint32_t fmt) +// file_fmt is basis_tex_format +bool is_basis_tex_format_ldr(uint32_t file_fmt) { - assert(fmt < (uint32_t)basis_tex_format::cTotalFormats); - return ((basis_tex_format)fmt == basis_tex_format::cUASTC_HDR_4x4) || ((basis_tex_format)fmt == basis_tex_format::cASTC_HDR_6x6) || ((basis_tex_format)fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE); + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_is_ldr((basis_tex_format)file_fmt); } -// basis_tex_format -bool is_basis_tex_format_ldr(uint32_t fmt) +// file_fmt is basis_tex_format +bool is_basis_tex_format_xuastc_ldr(uint32_t file_fmt) { - assert(fmt < (uint32_t)basis_tex_format::cTotalFormats); - return ((basis_tex_format)fmt == basis_tex_format::cETC1S) || ((basis_tex_format)fmt == basis_tex_format::cUASTC4x4); + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_is_xuastc_ldr((basis_tex_format)file_fmt); +} + +// file_fmt is basis_tex_format +bool is_basis_tex_format_astc_ldr(uint32_t file_fmt) +{ + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return basis_tex_format_is_astc_ldr((basis_tex_format)file_fmt); +} + +// Returns transcoder_texture_format, file_fmt is basis_tex_format. +// // Returns the best ASTC texture format to use given any basis_tex_format (the one with the proper block size). +// Use get_transcoder_texture_format_from_basis_tex_format() instead (same thing). Here for backwards compat. +uint32_t get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(uint32_t file_fmt) +{ + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return (uint32_t)basis_get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(static_cast(file_fmt)); +} + +// Same as get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format(), just a smaller name, works with any basis_tex_format. +// Returns the best ASTC texture format to use given any basis_tex_format (the one with the proper block size). +// Returns transcoder_texture_format, file_fmt is basis_tex_format. +uint32_t get_transcoder_texture_format_from_basis_tex_format(uint32_t file_fmt) +{ + assert(file_fmt < (uint32_t)basis_tex_format::cTotalFormats); + return (uint32_t)basis_get_transcoder_texture_format_from_basis_tex_format(static_cast(file_fmt)); } uint32_t convert_float_to_half(float f) @@ -1848,10 +2140,10 @@ uint32_t get_debug_flags_wrapper() EMSCRIPTEN_BINDINGS(basis_codec) { function("initializeBasis", &basis_init); - + function("setDebugFlags", &set_debug_flags_wrapper); function("getDebugFlags", &get_debug_flags_wrapper); - + // Expose BasisFileDesc structure value_object("BasisFileDesc") .field("version", &basis_file_desc::m_version) @@ -1926,6 +2218,19 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .value("cTFRGBA_HALF", transcoder_texture_format::cTFRGBA_HALF) .value("cTFRGB_9E5", transcoder_texture_format::cTFRGB_9E5) .value("cTFASTC_HDR_6x6_RGBA", transcoder_texture_format::cTFASTC_HDR_6x6_RGBA) + .value("cTFASTC_LDR_5x4_RGBA", transcoder_texture_format::cTFASTC_LDR_5x4_RGBA) + .value("cTFASTC_LDR_5x5_RGBA", transcoder_texture_format::cTFASTC_LDR_5x5_RGBA) + .value("cTFASTC_LDR_6x5_RGBA", transcoder_texture_format::cTFASTC_LDR_6x5_RGBA) + .value("cTFASTC_LDR_6x6_RGBA", transcoder_texture_format::cTFASTC_LDR_6x6_RGBA) + .value("cTFASTC_LDR_8x5_RGBA", transcoder_texture_format::cTFASTC_LDR_8x5_RGBA) + .value("cTFASTC_LDR_8x6_RGBA", transcoder_texture_format::cTFASTC_LDR_8x6_RGBA) + .value("cTFASTC_LDR_10x5_RGBA", transcoder_texture_format::cTFASTC_LDR_10x5_RGBA) + .value("cTFASTC_LDR_10x6_RGBA", transcoder_texture_format::cTFASTC_LDR_10x6_RGBA) + .value("cTFASTC_LDR_8x8_RGBA", transcoder_texture_format::cTFASTC_LDR_8x8_RGBA) + .value("cTFASTC_LDR_10x8_RGBA", transcoder_texture_format::cTFASTC_LDR_10x8_RGBA) + .value("cTFASTC_LDR_10x10_RGBA", transcoder_texture_format::cTFASTC_LDR_10x10_RGBA) + .value("cTFASTC_LDR_12x10_RGBA", transcoder_texture_format::cTFASTC_LDR_12x10_RGBA) + .value("cTFASTC_LDR_12x12_RGBA", transcoder_texture_format::cTFASTC_LDR_12x12_RGBA) .value("cTFTotalTextureFormats", transcoder_texture_format::cTFTotalTextureFormats) ; @@ -1944,7 +2249,11 @@ EMSCRIPTEN_BINDINGS(basis_codec) { function("isBasisTexFormatHDR", &is_basis_tex_format_hdr); function("isBasisTexFormatLDR", &is_basis_tex_format_ldr); - + function("isBasisTexFormatXUASTCLDR", &is_basis_tex_format_xuastc_ldr); + function("isBasisTexFormatASTCLDR", &is_basis_tex_format_astc_ldr); + function("getTranscoderTextureFormatFromXUASTCOrASTCLDRBasisTexFormat", &get_transcoder_texture_format_from_xuastc_or_astc_ldr_basis_tex_format); + function("getTranscoderTextureFormatFromBasisTexFormat", &get_transcoder_texture_format_from_basis_tex_format); + function("convertFloatToHalf", &convert_float_to_half); function("convertHalfToFloat", &convert_half_to_float); @@ -1957,13 +2266,46 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .value("cBASISTexTypeVolume", cBASISTexTypeVolume) ; - // Expose enum basis_tex_format + // Expose enum basis_tex_format - supported KTX2/.basis texture types. enum_("basis_tex_format") .value("cETC1S", basis_tex_format::cETC1S) - .value("cUASTC4x4", basis_tex_format::cUASTC4x4) + .value("cUASTC4x4", basis_tex_format::cUASTC_LDR_4x4) // name has changed, keeping for backwards compat + .value("cUASTC_LDR_4x4", basis_tex_format::cUASTC_LDR_4x4) .value("cUASTC_HDR_4x4", basis_tex_format::cUASTC_HDR_4x4) .value("cASTC_HDR_6x6", basis_tex_format::cASTC_HDR_6x6) - .value("cASTC_HDR_6x6_INTERMEDIATE", basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE) + .value("cUASTC_HDR_6x6_INTERMEDIATE", basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) + .value("cUASTC_HDR_6x6", basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) // the correct name + .value("cASTC_HDR_6x6_INTERMEDIATE", basis_tex_format::cUASTC_HDR_6x6_INTERMEDIATE) // was misnamed in previous release, keeping for backwards compat + // XUASTC LDR 4x4-12x12 + .value("cXUASTC_LDR_4x4", basis_tex_format::cXUASTC_LDR_4x4) + .value("cXUASTC_LDR_5x4", basis_tex_format::cXUASTC_LDR_5x4) + .value("cXUASTC_LDR_5x5", basis_tex_format::cXUASTC_LDR_5x5) + .value("cXUASTC_LDR_6x5", basis_tex_format::cXUASTC_LDR_6x5) + .value("cXUASTC_LDR_6x6", basis_tex_format::cXUASTC_LDR_6x6) + .value("cXUASTC_LDR_8x5", basis_tex_format::cXUASTC_LDR_8x5) + .value("cXUASTC_LDR_8x6", basis_tex_format::cXUASTC_LDR_8x6) + .value("cXUASTC_LDR_10x5", basis_tex_format::cXUASTC_LDR_10x5) + .value("cXUASTC_LDR_10x6", basis_tex_format::cXUASTC_LDR_10x6) + .value("cXUASTC_LDR_8x8", basis_tex_format::cXUASTC_LDR_8x8) + .value("cXUASTC_LDR_10x8", basis_tex_format::cXUASTC_LDR_10x8) + .value("cXUASTC_LDR_10x10", basis_tex_format::cXUASTC_LDR_10x10) + .value("cXUASTC_LDR_12x10", basis_tex_format::cXUASTC_LDR_12x10) + .value("cXUASTC_LDR_12x12", basis_tex_format::cXUASTC_LDR_12x12) + // ASTC LDR 4x4-12x12 + .value("cASTC_LDR_4x4", basis_tex_format::cASTC_LDR_4x4) + .value("cASTC_LDR_5x4", basis_tex_format::cASTC_LDR_5x4) + .value("cASTC_LDR_5x5", basis_tex_format::cASTC_LDR_5x5) + .value("cASTC_LDR_6x5", basis_tex_format::cASTC_LDR_6x5) + .value("cASTC_LDR_6x6", basis_tex_format::cASTC_LDR_6x6) + .value("cASTC_LDR_8x5", basis_tex_format::cASTC_LDR_8x5) + .value("cASTC_LDR_8x6", basis_tex_format::cASTC_LDR_8x6) + .value("cASTC_LDR_10x5", basis_tex_format::cASTC_LDR_10x5) + .value("cASTC_LDR_10x6", basis_tex_format::cASTC_LDR_10x6) + .value("cASTC_LDR_8x8", basis_tex_format::cASTC_LDR_8x8) + .value("cASTC_LDR_10x8", basis_tex_format::cASTC_LDR_10x8) + .value("cASTC_LDR_10x10", basis_tex_format::cASTC_LDR_10x10) + .value("cASTC_LDR_12x10", basis_tex_format::cASTC_LDR_12x10) + .value("cASTC_LDR_12x12", basis_tex_format::cASTC_LDR_12x12) ; // .basis file transcoder object. If all you want to do is transcode already encoded .basis files, this is all you really need. @@ -1999,6 +2341,12 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("isLDR", optional_override([](basis_file& self) { return self.isLDR(); })) + .function("isASTC_LDR", optional_override([](basis_file& self) { + return self.isASTC_LDR(); + })) + .function("isXUASTC_LDR", optional_override([](basis_file& self) { + return self.isXUASTC_LDR(); + })) .function("getNumImages", optional_override([](basis_file& self) { return self.getNumImages(); })) @@ -2055,6 +2403,10 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .value("cDecodeFlagsOutputHasAlphaIndices", cDecodeFlagsOutputHasAlphaIndices) .value("cDecodeFlagsHighQuality", cDecodeFlagsHighQuality) .value("cDecodeFlagsNoETC1SChromaFiltering", cDecodeFlagsNoETC1SChromaFiltering) + .value("cDecodeFlagsNoDeblockFiltering", cDecodeFlagsNoDeblockFiltering) + .value("cDecodeFlagsStrongerDeblockFiltering", cDecodeFlagsStrongerDeblockFiltering) + .value("cDecodeFlagsForceDeblockFiltering", cDecodeFlagsForceDeblockFiltering) + .value("cDecodeFlagXUASTCLDRDisableFastBC7Transcoding", cDecodeFlagXUASTCLDRDisableFastBC7Transcoding) ; // The low-level ETC1S transcoder is a class because it has persistent state (such as the endpoint/selector codebooks and Huffman tables, and transcoder state for video) @@ -2067,10 +2419,11 @@ EMSCRIPTEN_BINDINGS(basis_codec) { // The low-level UASTC transcoder (for UASTC LDR 4x4, HDR 4x4, or ASTC HDR 6x6) is a single function. function("transcodeUASTCImage", &transcode_uastc_image); + function("transcodeUASTCImage2", &transcode_uastc_image2); function("transcoderSupportsKTX2", &basisu_transcoder_supports_ktx2); function("transcoderSupportsKTX2Zstd", &basisu_transcoder_supports_ktx2_zstd); - + #if BASISD_SUPPORT_KTX2 // KTX2 enums/constants enum_("ktx2_supercompression") @@ -2080,10 +2433,11 @@ EMSCRIPTEN_BINDINGS(basis_codec) { ; constant("KTX2_VK_FORMAT_UNDEFINED", KTX2_VK_FORMAT_UNDEFINED); - constant("KTX2_KDF_DF_MODEL_UASTC", KTX2_KDF_DF_MODEL_UASTC_LDR_4X4); constant("KTX2_KDF_DF_MODEL_ETC1S", KTX2_KDF_DF_MODEL_ETC1S); - constant("KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE", KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE); - + constant("KTX2_KDF_DF_MODEL_UASTC", KTX2_KDF_DF_MODEL_UASTC_LDR_4X4); + constant("KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE", KTX2_KDF_DF_MODEL_UASTC_HDR_6X6_INTERMEDIATE); + constant("KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE", KTX2_KDF_DF_MODEL_XUASTC_LDR_INTERMEDIATE); + constant("KTX2_IMAGE_IS_P_FRAME", KTX2_IMAGE_IS_P_FRAME); constant("KTX2_UASTC_BLOCK_SIZE", KTX2_UASTC_BLOCK_SIZE); constant("KTX2_MAX_SUPPORTED_LEVEL_COUNT", KTX2_MAX_SUPPORTED_LEVEL_COUNT); @@ -2185,10 +2539,13 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("isHDR6x6", &ktx2_file::isHDR6x6) .function("isLDR", &ktx2_file::isLDR) .function("isETC1S", &ktx2_file::isETC1S) + .function("isASTC_LDR", &ktx2_file::isASTC_LDR) + .function("isXUASTC_LDR", &ktx2_file::isXUASTC_LDR) .function("getHasAlpha", &ktx2_file::getHasAlpha) .function("getDFDColorModel", &ktx2_file::getDFDColorModel) .function("getDFDColorPrimaries", &ktx2_file::getDFDColorPrimaries) .function("getDFDTransferFunc", &ktx2_file::getDFDTransferFunc) + .function("isSRGB", &ktx2_file::isSRGB) .function("getDFDFlags", &ktx2_file::getDFDFlags) .function("getDFDTotalSamples", &ktx2_file::getDFDTotalSamples) .function("getDFDChannelID0", &ktx2_file::getDFDChannelID0) @@ -2219,6 +2576,8 @@ EMSCRIPTEN_BINDINGS(basis_codec) { constant("BASISU_MAX_IMAGE_DIMENSION", BASISU_MAX_IMAGE_DIMENSION); constant("BASISU_QUALITY_MIN", BASISU_QUALITY_MIN); constant("BASISU_QUALITY_MAX", BASISU_QUALITY_MAX); + constant("BASISU_XUASTC_QUALITY_MIN", BASISU_XUASTC_QUALITY_MIN); + constant("BASISU_XUASTC_QUALITY_MAX", BASISU_XUASTC_QUALITY_MAX); constant("BASISU_MAX_ENDPOINT_CLUSTERS", BASISU_MAX_ENDPOINT_CLUSTERS); constant("BASISU_MAX_SELECTOR_CLUSTERS", BASISU_MAX_SELECTOR_CLUSTERS); constant("BASISU_MAX_SLICES", BASISU_MAX_SLICES); @@ -2226,10 +2585,10 @@ EMSCRIPTEN_BINDINGS(basis_codec) { constant("BASISU_RDO_UASTC_DICT_SIZE_MIN", BASISU_RDO_UASTC_DICT_SIZE_MIN); constant("BASISU_RDO_UASTC_DICT_SIZE_MAX", BASISU_RDO_UASTC_DICT_SIZE_MAX); constant("BASISU_MAX_RESAMPLER_FILTERS", g_num_resample_filters); - constant("BASISU_DEFAULT_COMPRESSION_LEVEL", BASISU_DEFAULT_COMPRESSION_LEVEL); - constant("BASISU_MAX_COMPRESSION_LEVEL", BASISU_MAX_COMPRESSION_LEVEL); + constant("BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL", BASISU_DEFAULT_ETC1S_COMPRESSION_LEVEL); + constant("BASISU_MAX_ETC1S_COMPRESSION_LEVEL", BASISU_MAX_ETC1S_COMPRESSION_LEVEL); - // The maximum representable floating point value in a UASTC HDR or ASTC HDR texture (any larger values will get clamped and a warning issued). + // The maximum representable floating point value in a UASTC HDR or ASTC HDR texture (any larger values will get clamped and a warning issued). constant("ASTC_HDR_MAX_VAL", basist::ASTC_HDR_MAX_VAL); // UASTC LDR/HDR flags/options @@ -2244,12 +2603,12 @@ EMSCRIPTEN_BINDINGS(basis_codec) { constant("cPackUASTCETC1FasterHints", cPackUASTCETC1FasterHints); constant("cPackUASTCETC1FastestHints", cPackUASTCETC1FastestHints); constant("cPackUASTCETC1DisableFlipAndIndividual", cPackUASTCETC1DisableFlipAndIndividual); - + constant("UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO", UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO); constant("UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH", UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH); constant("cPackASTC6x6MaxUserCompLevel", ::astc_6x6_hdr::ASTC_HDR_6X6_MAX_USER_COMP_LEVEL); - + enum_("hdr_image_type") .value("cHITRGBAHalfFloat", hdr_image_type::cHITRGBAHalfFloat) .value("cHITRGBAFloat", hdr_image_type::cHITRGBAFloat) @@ -2265,6 +2624,13 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .value("cJPGImage", ldr_image_type::cJPGImage) ; + enum_("xuastc_ldr_syntax") + .value("cFullArith", xuastc_ldr_syntax::cFullArith) + .value("cHybridArithZStd", xuastc_ldr_syntax::cHybridArithZStd) + .value("cFullZStd", xuastc_ldr_syntax::cFullZStd) + .value("cTotal", xuastc_ldr_syntax::cTotal) + ; + // Compression/encoding object. // You create this object, call the set() methods to fill in the parameters/source images/options, call encode(), and you get back a .basis or .KTX2 file. // You can call .encode() multiple times, changing the parameters/options in between calls. @@ -2278,6 +2644,10 @@ EMSCRIPTEN_BINDINGS(basis_codec) { return self.encode(dst_basis_file_js_val); })) + .function("getLastEncodeMip0RGBAPSNR", optional_override([](basis_encoder& self) { + return self.get_last_encode_mip0_rgba_psnr(); + })) + // Sets the slice's source image, either from a PNG/JPG file or from a raw 32-bit RGBA raster image. // If the input is a raster image, the buffer must be width*height*4 bytes in size. The raster image is stored in top down scanline order. // The first texel is the top-left texel. The texel byte order in memory is R,G,B,A (R first at offset 0, A last at offset 3). @@ -2287,18 +2657,19 @@ EMSCRIPTEN_BINDINGS(basis_codec) { return self.set_slice_source_image(slice_index, src_image_js_val, width, height, (ldr_image_type)img_type); })) + // If true threaded compression will be used with X *extra* helper threads. .function("controlThreading", optional_override([](basis_encoder& self, bool enable_threading, uint32_t num_extra_worker_threads) { return self.control_threading(enable_threading, num_extra_worker_threads); })) // HDR targets only - .function("setSliceSourceImageHDR", optional_override([](basis_encoder& self, uint32_t slice_index, const emscripten::val& src_image_js_val, uint32_t width, uint32_t height, uint32_t img_type, + .function("setSliceSourceImageHDR", optional_override([](basis_encoder& self, uint32_t slice_index, const emscripten::val& src_image_js_val, uint32_t width, uint32_t height, uint32_t img_type, bool ldr_srgb_to_linear_conversion, float ldr_to_hdr_nit_multiplier) { return self.set_slice_source_image_hdr(slice_index, src_image_js_val, width, height, (hdr_image_type)img_type, ldr_srgb_to_linear_conversion, ldr_to_hdr_nit_multiplier); })) - // Sets the desired encoding format. This is the preferred way to control which format the encoder creates. - // tex_format is a basis_tex_format (cETC1s, cUASTC4x4, cUASTC_HDR_4x4 etc.) + // Sets the desired encoding format. This is the preferred way to control which format/ASTC block size the encoder creates. + // tex_format is a basis_tex_format (cETC1s, cUASTC_LDR_4x4, cUASTC_HDR_4x4 etc.) - see basisu_file_headers.h. // This can be used instead of the older setUASTC(), setHDR() etc. methods. // All formats .function("setFormatMode", optional_override([](basis_encoder& self, int tex_format) { @@ -2306,12 +2677,23 @@ EMSCRIPTEN_BINDINGS(basis_codec) { self.m_params.set_format_mode((basis_tex_format)tex_format); })) + // setFormatModeAndEffortQuality() is like setFormatMode(), except it also sets the effort [0,10] and quality [0,100] parameters to (hopefully) reasonable values for the selected format. + // If effort==-1, no effort related parameters will be modified. + // If quality==-1, no quality related parameters will be modified. + // These values directly correspond to the command line tool's "-effort X" and "-quality X" unified codec compression options. + .function("setFormatModeAndQualityEffort", optional_override([](basis_encoder& self, int tex_format, int quality, int effort, bool set_defaults) { + assert((tex_format >= 0) && (tex_format < (uint32_t)basis_tex_format::cTotalFormats)); + assert((effort >= -1) && (effort <= 10)); + assert((quality >= -1) && (quality <= 100)); + self.m_params.set_format_mode_and_quality_effort((basis_tex_format)tex_format, quality, effort, set_defaults); + })) + // If true, the encoder will output a UASTC LDR 4x4 texture, otherwise a ETC1S texture. // (This is for backwards compatibility, prefer setFormatMode() instead.) // All formats .function("setUASTC", optional_override([](basis_encoder& self, bool uastc_flag) { if (uastc_flag) - self.m_params.set_format_mode(basis_tex_format::cUASTC4x4); + self.m_params.set_format_mode(basis_tex_format::cUASTC_LDR_4x4); else self.m_params.set_format_mode(basis_tex_format::cETC1S); })) @@ -2325,8 +2707,9 @@ EMSCRIPTEN_BINDINGS(basis_codec) { else self.m_params.set_format_mode(basis_tex_format::cETC1S); // don't really know what to set })) - - // Sets the UASTC HDR 4x4 quality vs. encoder performance tradeoff (0-4, default is 1). Higher=slower but better quality. + + // Sets the UASTC HDR 4x4 quality/effort vs. encoder performance tradeoff (0-4, default is 1). Higher=slower but better quality. + // TODO: Rename, this is really a compressor "effort" level. // UASTC HDR 4x4 .function("setUASTCHDRQualityLevel", optional_override([](basis_encoder& self, int level) { assert((level >= uastc_hdr_4x4_codec_options::cMinLevel) && (level <= uastc_hdr_4x4_codec_options::cMaxLevel)); @@ -2348,6 +2731,7 @@ EMSCRIPTEN_BINDINGS(basis_codec) { // If true, the input is assumed to be in sRGB space. Be sure to set this correctly! (Examples: True on photos, albedo/spec maps, and false on normal maps.) // In HDR mode, if perceptual is true R and G are weighted higher (2.0, 3.0) than B (1.0). Otherwise the encoder uses equal weightings for each channel. + // Importantly, also see setKTX2SRGBTransferFunc() and setMipSRGB(). // ETC1S, UASTC LDR 4x4, UASTC HDR 4x4 .function("setPerceptual", optional_override([](basis_encoder& self, bool perceptual_flag) { self.m_params.m_perceptual = perceptual_flag; @@ -2382,8 +2766,9 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("setLambda", optional_override([](basis_encoder& self, float rdo_quality) { self.m_params.m_astc_hdr_6x6_options.m_lambda = rdo_quality; })) - - // ASTC HDR 6x6: Enables REC 2020 delta E ITP vs. REC 709 in the encoder. + + // ASTC HDR 6x6: Enables REC 2020 delta E ITP vs. REC 709 in the encoder (and sets the colorspace in the KTX2 header). + // Note this colorspace always goes into the KTX2 header (DFD), for all modes (ETC1S, UASTC LDR 4x4, etc.) .function("setRec2020", optional_override([](basis_encoder& self, bool rec2020) { self.m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = rec2020; })) @@ -2415,6 +2800,11 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("setASTC_HDR_6x6_BruteForcePats", optional_override([](basis_encoder& self, bool flag) { self.m_params.m_astc_hdr_6x6_options.m_brute_force_partition_matching = flag; })) + + // ASTC HDR 6x6: Write v1.6 compatible UASTC HDR 6x6i files (the current default, but eventually this will be changed to default to false, requiring v2.0 or later transcoders) + .function("setASTC_HDR_6x6_WriteBasisU16CompatibleFiles", optional_override([](basis_encoder& self, bool flag) { + self.m_params.m_astc_hdr_6x6_options.m_write_basisu_1_6_compatible_files = flag; + })) // ASTC HDR 6x6: Control gaussian filtering on very hard blocks .function("setASTC_HDR_6x6_SetGaussian1", optional_override([](basis_encoder& self, float strength) { @@ -2471,22 +2861,23 @@ EMSCRIPTEN_BINDINGS(basis_codec) { self.m_params.m_etc1s_max_selector_clusters = max_selector_clusters; })) - // Sets the ETC1S encoder's quality level, which controls the file size vs. quality tradeoff. - // Default is -1 (meaning unused - the compressor will use m_max_endpoint_clusters/m_max_selector_clusters instead to control the codebook sizes). - // Range is [1,BASISU_QUALITY_MAX] - // ETC1S mode + // Sets the ETC1S or XUASTC LDR 4x4-12x12 encoder's quality level, which controls the file size vs. quality tradeoff. + // Default is -1 (meaning unused - the compressor will use m_max_endpoint_clusters/m_max_selector_clusters instead to control the codebook sizes in ETC1S mode, or no DCT in XUASTC LDR 4x4-12x12 mode). + // Range is [1,BASISU_QUALITY_MAX] (ETC1S) or [1,100] (XUASTC LDR 4x4-12x12) + // For XUASTC LDR, you also need to enable DCT usage, below. + // ETC1S mode or XUASTC LDR 4x4-12x12 .function("setQualityLevel", optional_override([](basis_encoder& self, int quality_level) { assert(quality_level >= -1 && quality_level <= BASISU_QUALITY_MAX); - self.m_params.m_etc1s_quality_level = quality_level; + self.m_params.m_quality_level = quality_level; })) // The compression_level parameter controls the encoder perf vs. file size tradeoff for ETC1S files. // It does not directly control file size vs. quality - see quality_level(). // Default is BASISU_DEFAULT_COMPRESSION_LEVEL, range is [0,BASISU_MAX_COMPRESSION_LEVEL] // ETC1S mode - .function("setCompressionLevel", optional_override([](basis_encoder& self, int comp_level) { - assert(comp_level >= 0 && comp_level <= BASISU_MAX_COMPRESSION_LEVEL); - self.m_params.m_compression_level = comp_level; + .function("setETC1SCompressionLevel", optional_override([](basis_encoder& self, int comp_level) { + assert(comp_level >= 0 && comp_level <= BASISU_MAX_ETC1S_COMPRESSION_LEVEL); + self.m_params.m_etc1s_compression_level = comp_level; })) // setNormalMapMode is the same as the basisu.exe "-normal_map" option. It tunes several codec parameters so compression works better on normal maps. @@ -2526,16 +2917,16 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("setKTX2UASTCSupercompression", optional_override([](basis_encoder& self, bool use_zstandard) { self.m_params.m_ktx2_uastc_supercompression = use_zstandard ? basist::KTX2_SS_ZSTANDARD : basist::KTX2_SS_NONE; })) + + // TODO: Expose KTX2 key value array, other options to JavaScript. See encoder/basisu_comp.h. +#endif - // KTX2: Use sRGB transfer func in the file's DFD. Default is FALSE. This should very probably match the "perceptual" setting. + // KTX2/.basis: Use sRGB transfer func in the file's header/DFD. Default is FALSE. This should very probably match the "perceptual" and mipRGB settings. // All formats - .function("setKTX2SRGBTransferFunc", optional_override([](basis_encoder& self, bool srgb_transfer_func) { - self.m_params.m_ktx2_srgb_transfer_func = srgb_transfer_func; + .function("setKTX2AndBasisSRGBTransferFunc", optional_override([](basis_encoder& self, bool srgb_transfer_func) { + self.m_params.m_ktx2_and_basis_srgb_transfer_function = srgb_transfer_func; })) - // TODO: Expose KTX2 key value array, other options to JavaScript. See encoder/basisu_comp.h. -#endif - // --- Mip-map options (format independent) // If true mipmaps will be generated from the source images @@ -2621,7 +3012,7 @@ EMSCRIPTEN_BINDINGS(basis_codec) { .function("setRDOUASTCQualityScalar", optional_override([](basis_encoder& self, float rdo_quality) { self.m_params.m_rdo_uastc_ldr_4x4_quality_scalar = rdo_quality; })) - + // Default is BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, range is [BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX] // UASTC LDR 4x4 .function("setRDOUASTCDictSize", optional_override([](basis_encoder& self, int dict_size) { @@ -2641,6 +3032,65 @@ EMSCRIPTEN_BINDINGS(basis_codec) { self.m_params.m_rdo_uastc_ldr_4x4_skip_block_rms_thresh = rdo_uastc_skip_block_rms_thresh; })) + // XUASTC/ASTC LDR 4x4-12x12 specific options + + // Enable XUASTC LDR DCT usage. Recommended to also enabled lossy supercompression for more compression. + // DCT quality [1,100] is set via setQualityLevel() above. + .function("setXUASTCLDRUseDCT", optional_override([](basis_encoder& self, bool xuastc_use_dct) { + self.m_params.m_xuastc_ldr_use_dct = xuastc_use_dct; + })) + + // Enables lossy XUASTC LDR supercompression (bounded distortion/windowed RDO) + .function("setXUASTCLDRUseLossySupercompression", optional_override([](basis_encoder& self, bool xuastc_use_lossy_supercompression) { + self.m_params.m_xuastc_ldr_use_lossy_supercompression = xuastc_use_lossy_supercompression; + })) + + // XUASTC LDR: Disable 2-3 subset usage, independent of effort level (for lower quality, for faster transcoding to BC7) + .function("setXUASTCLDRForceDisableSubsets", optional_override([](basis_encoder& self, bool flag) { + self.m_params.m_xuastc_ldr_force_disable_subsets = flag; + })) + + // XUASTC LDR: Disable RGB dual plane usage, indepdnent of effort level (for lower quality, for faster transcoding to BC7) + .function("setXUASTCLDRForceDisableRGBDualPlane", optional_override([](basis_encoder& self, bool flag) { + self.m_params.m_xuastc_ldr_force_disable_rgb_dual_plane = flag; + })) + + // Sets the XUASTC LDR syntax: see the xuastc_ldr_syntax enum. + .function("setXUASTCLDRSyntax", optional_override([](basis_encoder& self, int syntax) { + self.m_params.m_xuastc_ldr_syntax = syntax; + })) + + // Sets the ASTC/XUASTC LDR: compressor effort level [0,10] (encoding time vs. max achievable quality tradeoff, higher=slower) + // This is like setCompressionLevel() above, but for only ASTC/UASTC LDR 4x4-12x12, and has a different range. + .function("setASTCOrXUASTCLDREffortLevel", optional_override([](basis_encoder& self, int effort_level) { + self.m_params.m_xuastc_ldr_effort_level = effort_level; + })) + + // Sets the ASTC/XUASTC LDR channel weights + .function("setASTCOrXUASTCLDRWeights", optional_override([](basis_encoder& self, uint32_t x, uint32_t y, uint32_t z, uint32_t w) { + self.m_params.m_xuastc_ldr_channel_weights[0] = x; + self.m_params.m_xuastc_ldr_channel_weights[1] = y; + self.m_params.m_xuastc_ldr_channel_weights[2] = z; + self.m_params.m_xuastc_ldr_channel_weights[3] = w; + })) + + // Sets XUASTC LDR lossy supercompression (bounded/windows RDO) parameters. + // Must be enabled via setXUASTCLDRUseLossySupercompression(). + .function("setXUASTCLDRBoundedRDOParam", optional_override([](basis_encoder& self, uint32_t idx, float value) { + switch (idx) + { + case 0: self.m_params.m_ls_min_psnr = value; break; + case 1: self.m_params.m_ls_min_alpha_psnr = value; break; + case 2: self.m_params.m_ls_thresh_psnr = value; break; + case 3: self.m_params.m_ls_thresh_alpha_psnr = value; break; + case 4: self.m_params.m_ls_thresh_edge_psnr = value; break; + case 5: self.m_params.m_ls_thresh_edge_alpha_psnr = value; break; + default: + assert(0); + break; + } + })) + // --- Low level options // Disables ETC1S selector RDO diff --git a/external/basis_universal/webgl/transcoder/build/basis_transcoder.js b/external/basis_universal/webgl/transcoder/build/basis_transcoder.js index cc0f78ab93..bb00f4d781 100644 --- a/external/basis_universal/webgl/transcoder/build/basis_transcoder.js +++ b/external/basis_universal/webgl/transcoder/build/basis_transcoder.js @@ -1,2 +1,2 @@ -var BASIS=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};if(typeof __filename!="undefined"){_scriptName=__filename}else if(ENVIRONMENT_IS_WORKER){_scriptName=self.location.href}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("fs");scriptDirectory=__dirname+"/";readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{if(isFileURI(url)){return new Promise((resolve,reject)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response);return}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");var readyPromiseResolve,readyPromiseReject;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;wasmExports["J"]()}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("basis_transcoder.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);updateMemoryViews();return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){HEAPU32[this.ptr+4>>2]=type}get_type(){return HEAPU32[this.ptr+4>>2]}set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}get_destructor(){return HEAPU32[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}get_caught(){return HEAP8[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}get_rethrown(){return HEAP8[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}}var exceptionLast=0;var uncaughtExceptionCount=0;var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};var __abort_js=()=>abort("");var structRegistrations={};var runDestructors=destructors=>{while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}};function readPointer(pointer){return this.fromWireType(HEAPU32[pointer>>2])}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var InternalError=class InternalError extends Error{constructor(message){super(message);this.name="InternalError"}};var throwInternalError=message=>{throw new InternalError(message)};var whenDependentTypesAreResolved=(myTypes,dependentTypes,getTypeConverters)=>{myTypes.forEach(type=>typeDependencies[type]=dependentTypes);function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}}if(0===unregisteredTypes.length){onComplete(typeConverters)}};var __embind_finalize_value_object=structType=>{var reg=structRegistrations[structType];delete structRegistrations[structType];var rawConstructor=reg.rawConstructor;var rawDestructor=reg.rawDestructor;var fieldRecords=reg.fields;var fieldTypes=fieldRecords.map(field=>field.getterReturnType).concat(fieldRecords.map(field=>field.setterArgumentType));whenDependentTypesAreResolved([structType],fieldTypes,fieldTypes=>{var fields={};for(var[i,field]of fieldRecords.entries()){const getterReturnType=fieldTypes[i];const getter=field.getter;const getterContext=field.getterContext;const setterArgumentType=fieldTypes[i+fieldRecords.length];const setter=field.setter;const setterContext=field.setterContext;fields[field.fieldName]={read:ptr=>getterReturnType.fromWireType(getter(getterContext,ptr)),write:(ptr,o)=>{var destructors=[];setter(setterContext,ptr,setterArgumentType.toWireType(destructors,o));runDestructors(destructors)},optional:getterReturnType.optional}}return[{name:reg.name,fromWireType:ptr=>{var rv={};for(var i in fields){rv[i]=fields[i].read(ptr)}rawDestructor(ptr);return rv},toWireType:(destructors,o)=>{for(var fieldName in fields){if(!(fieldName in o)&&!fields[fieldName].optional){throw new TypeError(`Missing field: "${fieldName}"`)}}var ptr=rawConstructor();for(fieldName in fields){fields[fieldName].write(ptr,o[fieldName])}if(destructors!==null){destructors.push(rawDestructor,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction:rawDestructor}]})};var AsciiToString=ptr=>{var str="";while(1){var ch=HEAPU8[ptr++];if(!ch)return str;str+=String.fromCharCode(ch)}};var BindingError=class BindingError extends Error{constructor(message){super(message);this.name="BindingError"}};var throwBindingError=message=>{throw new BindingError(message)};function sharedRegisterType(rawType,registeredInstance,options={}){var name=registeredInstance.name;if(!rawType){throwBindingError(`type "${name}" must have a positive integer typeid pointer`)}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError(`Cannot register type '${name}' twice`)}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function registerType(rawType,registeredInstance,options={}){return sharedRegisterType(rawType,registeredInstance,options)}var integerReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?pointer=>HEAP8[pointer]:pointer=>HEAPU8[pointer];case 2:return signed?pointer=>HEAP16[pointer>>1]:pointer=>HEAPU16[pointer>>1];case 4:return signed?pointer=>HEAP32[pointer>>2]:pointer=>HEAPU32[pointer>>2];case 8:return signed?pointer=>HEAP64[pointer>>3]:pointer=>HEAPU64[pointer>>3];default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_bigint=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0n;let fromWireType=value=>value;if(isUnsignedType){const bitSize=size*8;fromWireType=value=>BigInt.asUintN(bitSize,value);maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>{if(typeof value=="number"){value=BigInt(value)}return value},readValueFromPointer:integerReadValueFromPointer(name,size,!isUnsignedType),destructorFunction:null})};var __embind_register_bool=(rawType,name,trueValue,falseValue)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:function(wt){return!!wt},toWireType:function(destructors,o){return o?trueValue:falseValue},readValueFromPointer:function(pointer){return this.fromWireType(HEAPU8[pointer])},destructorFunction:null})};var shallowCopyInternalPointer=o=>({count:o.count,deleteScheduled:o.deleteScheduled,preservePointerOnDelete:o.preservePointerOnDelete,ptr:o.ptr,ptrType:o.ptrType,smartPtr:o.smartPtr,smartPtrType:o.smartPtrType});var throwInstanceAlreadyDeleted=obj=>{function getInstanceTypeName(handle){return handle.$$.ptrType.registeredClass.name}throwBindingError(getInstanceTypeName(obj)+" instance already deleted")};var finalizationRegistry=false;var detachFinalizer=handle=>{};var runDestructor=$$=>{if($$.smartPtr){$$.smartPtrType.rawDestructor($$.smartPtr)}else{$$.ptrType.registeredClass.rawDestructor($$.ptr)}};var releaseClassHandle=$$=>{$$.count.value-=1;var toDelete=0===$$.count.value;if(toDelete){runDestructor($$)}};var attachFinalizer=handle=>{if(!globalThis.FinalizationRegistry){attachFinalizer=handle=>handle;return handle}finalizationRegistry=new FinalizationRegistry(info=>{releaseClassHandle(info.$$)});attachFinalizer=handle=>{var $$=handle.$$;var hasSmartPtr=!!$$.smartPtr;if(hasSmartPtr){var info={$$};finalizationRegistry.register(handle,info,handle)}return handle};detachFinalizer=handle=>finalizationRegistry.unregister(handle);return attachFinalizer(handle)};var deletionQueue=[];var flushPendingDeletes=()=>{while(deletionQueue.length){var obj=deletionQueue.pop();obj.$$.deleteScheduled=false;obj["delete"]()}};var delayFunction;var init_ClassHandle=()=>{let proto=ClassHandle.prototype;Object.assign(proto,{isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(other instanceof ClassHandle)){return false}var leftClass=this.$$.ptrType.registeredClass;var left=this.$$.ptr;other.$$=other.$$;var rightClass=other.$$.ptrType.registeredClass;var right=other.$$.ptr;while(leftClass.baseClass){left=leftClass.upcast(left);leftClass=leftClass.baseClass}while(rightClass.baseClass){right=rightClass.upcast(right);rightClass=rightClass.baseClass}return leftClass===rightClass&&left===right},clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.preservePointerOnDelete){this.$$.count.value+=1;return this}else{var clone=attachFinalizer(Object.create(Object.getPrototypeOf(this),{$$:{value:shallowCopyInternalPointer(this.$$)}}));clone.$$.count.value+=1;clone.$$.deleteScheduled=false;return clone}},delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}detachFinalizer(this);releaseClassHandle(this.$$);if(!this.$$.preservePointerOnDelete){this.$$.smartPtr=undefined;this.$$.ptr=undefined}},isDeleted(){return!this.$$.ptr},deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}deletionQueue.push(this);if(deletionQueue.length===1&&delayFunction){delayFunction(flushPendingDeletes)}this.$$.deleteScheduled=true;return this}});const symbolDispose=Symbol.dispose;if(symbolDispose){proto[symbolDispose]=proto["delete"]}};function ClassHandle(){}var createNamedFunction=(name,func)=>Object.defineProperty(func,"name",{value:name});var registeredPointers={};var ensureOverloadTable=(proto,methodName,humanName)=>{if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(...args){if(!proto[methodName].overloadTable.hasOwnProperty(args.length)){throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`)}return proto[methodName].overloadTable[args.length].apply(this,args)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}};var exposePublicSymbol=(name,value,numArguments)=>{if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError(`Cannot register public name '${name}' twice`)}ensureOverloadTable(Module,name,name);if(Module[name].overloadTable.hasOwnProperty(numArguments)){throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`)}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var char_0=48;var char_9=57;var makeLegalFunctionName=name=>{name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return`_${name}`}return name};function RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast){this.name=name;this.constructor=constructor;this.instancePrototype=instancePrototype;this.rawDestructor=rawDestructor;this.baseClass=baseClass;this.getActualType=getActualType;this.upcast=upcast;this.downcast=downcast;this.pureVirtualFunctions=[]}var upcastPointer=(ptr,ptrClass,desiredClass)=>{while(ptrClass!==desiredClass){if(!ptrClass.upcast){throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`)}ptr=ptrClass.upcast(ptr);ptrClass=ptrClass.baseClass}return ptr};var embindRepr=v=>{if(v===null){return"null"}var t=typeof v;if(t==="object"||t==="array"||t==="function"){return v.toString()}else{return""+v}};function constNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}function genericPointerToWireType(destructors,handle){var ptr;if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}if(this.isSmartPointer){ptr=this.rawConstructor();if(destructors!==null){destructors.push(this.rawDestructor,ptr)}return ptr}else{return 0}}if(!handle||!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(!this.isConst&&handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);if(this.isSmartPointer){if(undefined===handle.$$.smartPtr){throwBindingError("Passing raw pointer to smart pointer is illegal")}switch(this.sharingPolicy){case 0:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}break;case 1:ptr=handle.$$.smartPtr;break;case 2:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{var clonedHandle=handle["clone"]();ptr=this.rawShare(ptr,Emval.toHandle(()=>clonedHandle["delete"]()));if(destructors!==null){destructors.push(this.rawDestructor,ptr)}}break;default:throwBindingError("Unsupporting sharing policy")}}return ptr}function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}var downcastPointer=(ptr,ptrClass,desiredClass)=>{if(ptrClass===desiredClass){return ptr}if(undefined===desiredClass.baseClass){return null}var rv=downcastPointer(ptr,ptrClass,desiredClass.baseClass);if(rv===null){return null}return desiredClass.downcast(rv)};var registeredInstances={};var getBasestPointer=(class_,ptr)=>{if(ptr===undefined){throwBindingError("ptr should not be undefined")}while(class_.baseClass){ptr=class_.upcast(ptr);class_=class_.baseClass}return ptr};var getInheritedInstance=(class_,ptr)=>{ptr=getBasestPointer(class_,ptr);return registeredInstances[ptr]};var makeClassHandle=(prototype,record)=>{if(!record.ptrType||!record.ptr){throwInternalError("makeClassHandle requires ptr and ptrType")}var hasSmartPtrType=!!record.smartPtrType;var hasSmartPtr=!!record.smartPtr;if(hasSmartPtrType!==hasSmartPtr){throwInternalError("Both smartPtrType and smartPtr must be specified")}record.count={value:1};return attachFinalizer(Object.create(prototype,{$$:{value:record,writable:true}}))};function RegisteredPointer_fromWireType(ptr){var rawPointer=this.getPointee(ptr);if(!rawPointer){this.destructor(ptr);return null}var registeredInstance=getInheritedInstance(this.registeredClass,rawPointer);if(undefined!==registeredInstance){if(0===registeredInstance.$$.count.value){registeredInstance.$$.ptr=rawPointer;registeredInstance.$$.smartPtr=ptr;return registeredInstance["clone"]()}else{var rv=registeredInstance["clone"]();this.destructor(ptr);return rv}}function makeDefaultHandle(){if(this.isSmartPointer){return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this.pointeeType,ptr:rawPointer,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this,ptr})}}var actualType=this.registeredClass.getActualType(rawPointer);var registeredPointerRecord=registeredPointers[actualType];if(!registeredPointerRecord){return makeDefaultHandle.call(this)}var toType;if(this.isConst){toType=registeredPointerRecord.constPointerType}else{toType=registeredPointerRecord.pointerType}var dp=downcastPointer(rawPointer,this.registeredClass,toType.registeredClass);if(dp===null){return makeDefaultHandle.call(this)}if(this.isSmartPointer){return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp})}}var init_RegisteredPointer=()=>{Object.assign(RegisteredPointer.prototype,{getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}return ptr},destructor(ptr){this.rawDestructor?.(ptr)},readValueFromPointer:readPointer,fromWireType:RegisteredPointer_fromWireType})};function RegisteredPointer(name,registeredClass,isReference,isConst,isSmartPointer,pointeeType,sharingPolicy,rawGetPointee,rawConstructor,rawShare,rawDestructor){this.name=name;this.registeredClass=registeredClass;this.isReference=isReference;this.isConst=isConst;this.isSmartPointer=isSmartPointer;this.pointeeType=pointeeType;this.sharingPolicy=sharingPolicy;this.rawGetPointee=rawGetPointee;this.rawConstructor=rawConstructor;this.rawShare=rawShare;this.rawDestructor=rawDestructor;if(!isSmartPointer&®isteredClass.baseClass===undefined){if(isConst){this.toWireType=constNoSmartPtrRawPointerToWireType;this.destructorFunction=null}else{this.toWireType=nonConstNoSmartPtrRawPointerToWireType;this.destructorFunction=null}}else{this.toWireType=genericPointerToWireType}}var replacePublicSymbol=(name,value,numArguments)=>{if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistent public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var embind__requireFunction=(signature,rawFunction,isAsync=false)=>{signature=AsciiToString(signature);function makeDynCaller(){var rtn=getWasmTableEntry(rawFunction);return rtn}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`)}return fp};class UnboundTypeError extends Error{}var getTypeName=type=>{var ptr=___getTypeName(type);var rv=AsciiToString(ptr);_free(ptr);return rv};var throwUnboundTypeError=(message,types)=>{var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(`${message}: `+unboundTypes.map(getTypeName).join([", "]))};var __embind_register_class=(rawType,rawPointerType,rawConstPointerType,baseClassRawType,getActualTypeSignature,getActualType,upcastSignature,upcast,downcastSignature,downcast,name,destructorSignature,rawDestructor)=>{name=AsciiToString(name);getActualType=embind__requireFunction(getActualTypeSignature,getActualType);upcast&&=embind__requireFunction(upcastSignature,upcast);downcast&&=embind__requireFunction(downcastSignature,downcast);rawDestructor=embind__requireFunction(destructorSignature,rawDestructor);var legalFunctionName=makeLegalFunctionName(name);exposePublicSymbol(legalFunctionName,function(){throwUnboundTypeError(`Cannot construct ${name} due to unbound types`,[baseClassRawType])});whenDependentTypesAreResolved([rawType,rawPointerType,rawConstPointerType],baseClassRawType?[baseClassRawType]:[],base=>{base=base[0];var baseClass;var basePrototype;if(baseClassRawType){baseClass=base.registeredClass;basePrototype=baseClass.instancePrototype}else{basePrototype=ClassHandle.prototype}var constructor=createNamedFunction(name,function(...args){if(Object.getPrototypeOf(this)!==instancePrototype){throw new BindingError(`Use 'new' to construct ${name}`)}if(undefined===registeredClass.constructor_body){throw new BindingError(`${name} has no accessible constructor`)}var body=registeredClass.constructor_body[args.length];if(undefined===body){throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`)}return body.apply(this,args)});var instancePrototype=Object.create(basePrototype,{constructor:{value:constructor}});constructor.prototype=instancePrototype;var registeredClass=new RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast);if(registeredClass.baseClass){registeredClass.baseClass.__derivedClasses??=[];registeredClass.baseClass.__derivedClasses.push(registeredClass)}var referenceConverter=new RegisteredPointer(name,registeredClass,true,false,false);var pointerConverter=new RegisteredPointer(name+"*",registeredClass,false,false,false);var constPointerConverter=new RegisteredPointer(name+" const*",registeredClass,false,true,false);registeredPointers[rawType]={pointerType:pointerConverter,constPointerType:constPointerConverter};replacePublicSymbol(legalFunctionName,constructor);return[referenceConverter,pointerConverter,constPointerConverter]})};var heap32VectorToArray=(count,firstElement)=>{var array=[];for(var i=0;i>2])}return array};function usesDestructorStack(argTypes){for(var i=1;i{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);invoker=embind__requireFunction(invokerSignature,invoker);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`constructor ${classType.name}`;if(undefined===classType.registeredClass.constructor_body){classType.registeredClass.constructor_body=[]}if(undefined!==classType.registeredClass.constructor_body[argCount-1]){throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`)}classType.registeredClass.constructor_body[argCount-1]=()=>{throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`,rawArgTypes)};whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{argTypes.splice(1,0,null);classType.registeredClass.constructor_body[argCount-1]=craftInvokerFunction(humanName,argTypes,null,invoker,rawConstructor);return[]});return[]})};var getFunctionName=signature=>{signature=signature.trim();const argsIndex=signature.indexOf("(");if(argsIndex===-1)return signature;return signature.slice(0,argsIndex)};var __embind_register_class_function=(rawClassType,methodName,argCount,rawArgTypesAddr,invokerSignature,rawInvoker,context,isPureVirtual,isAsync,isNonnullReturn)=>{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);methodName=AsciiToString(methodName);methodName=getFunctionName(methodName);rawInvoker=embind__requireFunction(invokerSignature,rawInvoker,isAsync);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`${classType.name}.${methodName}`;if(methodName.startsWith("@@")){methodName=Symbol[methodName.substring(2)]}if(isPureVirtual){classType.registeredClass.pureVirtualFunctions.push(methodName)}function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`,rawArgTypes)}var proto=classType.registeredClass.instancePrototype;var method=proto[methodName];if(undefined===method||undefined===method.overloadTable&&method.className!==classType.name&&method.argCount===argCount-2){unboundTypesHandler.argCount=argCount-2;unboundTypesHandler.className=classType.name;proto[methodName]=unboundTypesHandler}else{ensureOverloadTable(proto,methodName,humanName);proto[methodName].overloadTable[argCount-2]=unboundTypesHandler}whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{var memberFunction=craftInvokerFunction(humanName,argTypes,classType,rawInvoker,context,isAsync);if(undefined===proto[methodName].overloadTable){memberFunction.argCount=argCount-2;proto[methodName]=memberFunction}else{proto[methodName].overloadTable[argCount-2]=memberFunction}return[]});return[]})};var __embind_register_constant=(name,type,value)=>{name=AsciiToString(name);whenDependentTypesAreResolved([],[type],type=>{type=type[0];Module[name]=type.fromWireType(value);return[]})};var emval_freelist=[];var emval_handles=[0,1,,1,null,1,true,1,false,1];var __emval_decref=handle=>{if(handle>9&&0===--emval_handles[handle+1]){emval_handles[handle]=undefined;emval_freelist.push(handle)}};var Emval={toValue:handle=>{if(!handle){throwBindingError(`Cannot use deleted val. handle = ${handle}`)}return emval_handles[handle]},toHandle:value=>{switch(value){case undefined:return 2;case null:return 4;case true:return 6;case false:return 8;default:{const handle=emval_freelist.pop()||emval_handles.length;emval_handles[handle]=value;emval_handles[handle+1]=1;return handle}}}};var EmValType={name:"emscripten::val",fromWireType:handle=>{var rv=Emval.toValue(handle);__emval_decref(handle);return rv},toWireType:(destructors,value)=>Emval.toHandle(value),readValueFromPointer:readPointer,destructorFunction:null};var __embind_register_emval=rawType=>registerType(rawType,EmValType);var enumReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?function(pointer){return this.fromWireType(HEAP8[pointer])}:function(pointer){return this.fromWireType(HEAPU8[pointer])};case 2:return signed?function(pointer){return this.fromWireType(HEAP16[pointer>>1])}:function(pointer){return this.fromWireType(HEAPU16[pointer>>1])};case 4:return signed?function(pointer){return this.fromWireType(HEAP32[pointer>>2])}:function(pointer){return this.fromWireType(HEAPU32[pointer>>2])};default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_enum=(rawType,name,size,isSigned)=>{name=AsciiToString(name);function ctor(){}ctor.values={};registerType(rawType,{name,constructor:ctor,fromWireType:function(c){return this.constructor.values[c]},toWireType:(destructors,c)=>c.value,readValueFromPointer:enumReadValueFromPointer(name,size,isSigned),destructorFunction:null});exposePublicSymbol(name,ctor)};var requireRegisteredType=(rawType,humanName)=>{var impl=registeredTypes[rawType];if(undefined===impl){throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`)}return impl};var __embind_register_enum_value=(rawEnumType,name,enumValue)=>{var enumType=requireRegisteredType(rawEnumType,"enum");name=AsciiToString(name);var Enum=enumType.constructor;var Value=Object.create(enumType.constructor.prototype,{value:{value:enumValue},constructor:{value:createNamedFunction(`${enumType.name}_${name}`,function(){})}});Enum.values[enumValue]=Value;Enum[name]=Value};var floatReadValueFromPointer=(name,width)=>{switch(width){case 4:return function(pointer){return this.fromWireType(HEAPF32[pointer>>2])};case 8:return function(pointer){return this.fromWireType(HEAPF64[pointer>>3])};default:throw new TypeError(`invalid float width (${width}): ${name}`)}};var __embind_register_float=(rawType,name,size)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:value=>value,toWireType:(destructors,value)=>value,readValueFromPointer:floatReadValueFromPointer(name,size),destructorFunction:null})};var __embind_register_function=(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn,isAsync,isNonnullReturn)=>{var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=AsciiToString(name);name=getFunctionName(name);rawInvoker=embind__requireFunction(signature,rawInvoker,isAsync);exposePublicSymbol(name,function(){throwUnboundTypeError(`Cannot call ${name} due to unbound types`,argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,argTypes=>{var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn,isAsync),argCount-1);return[]})};var __embind_register_integer=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0;let fromWireType=value=>value;if(isUnsignedType){var bitshift=32-8*size;fromWireType=value=>value<>>bitshift;maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>value,readValueFromPointer:integerReadValueFromPointer(name,size,minRange!==0),destructorFunction:null})};var __embind_register_memory_view=(rawType,dataTypeIndex,name)=>{var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){var size=HEAPU32[handle>>2];var data=HEAPU32[handle+4>>2];return new TA(HEAP8.buffer,data,size)}name=AsciiToString(name);registerType(rawType,{name,fromWireType:decodeMemoryView,readValueFromPointer:decodeMemoryView},{ignoreDuplicateRegistrations:true})};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite);var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var UTF8Decoder=globalThis.TextDecoder&&new TextDecoder;var findStringEnd=(heapOrArray,idx,maxBytesToRead,ignoreNul)=>{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead,ignoreNul):"";var __embind_register_std_string=(rawType,name)=>{name=AsciiToString(name);var stdStringIsUTF8=true;registerType(rawType,{name,fromWireType(value){var length=HEAPU32[value>>2];var payload=value+4;var str;if(stdStringIsUTF8){str=UTF8ToString(payload,length,true)}else{str="";for(var i=0;i>2]=length;if(valueIsOfTypeString){if(stdStringIsUTF8){stringToUTF8(value,ptr,length+1)}else{for(var i=0;i255){_free(base);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+i]=charCode}}}else{HEAPU8.set(value,ptr)}if(destructors!==null){destructors.push(_free,base)}return base},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var UTF16Decoder=globalThis.TextDecoder?new TextDecoder("utf-16le"):undefined;var UTF16ToString=(ptr,maxBytesToRead,ignoreNul)=>{var idx=ptr>>1;var endIdx=findStringEnd(HEAPU16,idx,maxBytesToRead/2,ignoreNul);if(endIdx-idx>16&&UTF16Decoder)return UTF16Decoder.decode(HEAPU16.subarray(idx,endIdx));var str="";for(var i=idx;i{maxBytesToWrite??=2147483647;if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr};var lengthBytesUTF16=str=>str.length*2;var UTF32ToString=(ptr,maxBytesToRead,ignoreNul)=>{var str="";var startIdx=ptr>>2;for(var i=0;!(i>=maxBytesToRead/4);i++){var utf32=HEAPU32[startIdx+i];if(!utf32&&!ignoreNul)break;str+=String.fromCodePoint(utf32)}return str};var stringToUTF32=(str,outPtr,maxBytesToWrite)=>{maxBytesToWrite??=2147483647;if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i65535){i++}HEAP32[outPtr>>2]=codePoint;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr};var lengthBytesUTF32=str=>{var len=0;for(var i=0;i65535){i++}len+=4}return len};var __embind_register_std_wstring=(rawType,charSize,name)=>{name=AsciiToString(name);var decodeString,encodeString,lengthBytesUTF;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16}else{decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32}registerType(rawType,{name,fromWireType:value=>{var length=HEAPU32[value>>2];var str=decodeString(value+4,length*charSize,true);_free(value);return str},toWireType:(destructors,value)=>{if(!(typeof value=="string")){throwBindingError(`Cannot pass non-string to C++ string type ${name}`)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length/charSize;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var __embind_register_value_object=(rawType,name,constructorSignature,rawConstructor,destructorSignature,rawDestructor)=>{structRegistrations[rawType]={name:AsciiToString(name),rawConstructor:embind__requireFunction(constructorSignature,rawConstructor),rawDestructor:embind__requireFunction(destructorSignature,rawDestructor),fields:[]}};var __embind_register_value_object_field=(structType,fieldName,getterReturnType,getterSignature,getter,getterContext,setterArgumentType,setterSignature,setter,setterContext)=>{structRegistrations[structType].fields.push({fieldName:AsciiToString(fieldName),getterReturnType,getter:embind__requireFunction(getterSignature,getter),getterContext,setterArgumentType,setter:embind__requireFunction(setterSignature,setter),setterContext})};var __embind_register_void=(rawType,name)=>{name=AsciiToString(name);registerType(rawType,{isVoid:true,name,fromWireType:()=>undefined,toWireType:(destructors,o)=>undefined})};var emval_methodCallers=[];var emval_addMethodCaller=caller=>{var id=emval_methodCallers.length;emval_methodCallers.push(caller);return id};var emval_lookupTypes=(argCount,argTypes)=>{var a=new Array(argCount);for(var i=0;i>2],`parameter ${i}`)}return a};var emval_returnValue=(toReturnWire,destructorsRef,handle)=>{var destructors=[];var result=toReturnWire(destructors,handle);if(destructors.length){HEAPU32[destructorsRef>>2]=Emval.toHandle(destructors)}return result};var emval_symbols={};var getStringOrSymbol=address=>{var symbol=emval_symbols[address];if(symbol===undefined){return AsciiToString(address)}return symbol};var __emval_create_invoker=(argCount,argTypesPtr,kind)=>{var GenericWireTypeSize=8;var[retType,...argTypes]=emval_lookupTypes(argCount,argTypesPtr);var toReturnWire=retType.toWireType.bind(retType);var argFromPtr=argTypes.map(type=>type.readValueFromPointer.bind(type));argCount--;var captures={toValue:Emval.toValue};var args=argFromPtr.map((argFromPtr,i)=>{var captureName=`argFromPtr${i}`;captures[captureName]=argFromPtr;return`${captureName}(args${i?"+"+i*GenericWireTypeSize:""})`});var functionBody;switch(kind){case 0:functionBody="toValue(handle)";break;case 2:functionBody="new (toValue(handle))";break;case 3:functionBody="";break;case 1:captures["getStringOrSymbol"]=getStringOrSymbol;functionBody="toValue(handle)[getStringOrSymbol(methodName)]";break}functionBody+=`(${args})`;if(!retType.isVoid){captures["toReturnWire"]=toReturnWire;captures["emval_returnValue"]=emval_returnValue;functionBody=`return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`}functionBody=`return function (handle, methodName, destructorsRef, args) {\n ${functionBody}\n }`;var invokerFunction=new Function(Object.keys(captures),functionBody)(...Object.values(captures));var functionName=`methodCaller<(${argTypes.map(t=>t.name)}) => ${retType.name}>`;return emval_addMethodCaller(createNamedFunction(functionName,invokerFunction))};var __emval_get_global=name=>{if(!name){return Emval.toHandle(globalThis)}name=getStringOrSymbol(name);return Emval.toHandle(globalThis[name])};var __emval_get_module_property=name=>{name=getStringOrSymbol(name);return Emval.toHandle(Module[name])};var __emval_get_property=(handle,key)=>{handle=Emval.toValue(handle);key=Emval.toValue(key);return Emval.toHandle(handle[key])};var __emval_incref=handle=>{if(handle>9){emval_handles[handle+1]+=1}};var __emval_invoke=(caller,handle,methodName,destructorsRef,args)=>emval_methodCallers[caller](handle,methodName,destructorsRef,args);var __emval_new_cstring=v=>Emval.toHandle(getStringOrSymbol(v));var __emval_run_destructors=handle=>{var destructors=Emval.toValue(handle);runDestructors(destructors);__emval_decref(handle)};var getHeapMax=()=>2147483648;var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var _fd_close=fd=>52;var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function _fd_seek(fd,offset,whence,newOffset){offset=bigintToI53Checked(offset);return 70}var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer));buffer.length=0}else{buffer.push(curr)}};var _fd_write=(fd,iov,iovcnt,pnum)=>{var num=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;for(var j=0;j>2]=num;return 0};init_ClassHandle();init_RegisteredPointer();{if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}var ___getTypeName,_malloc,_free,memory,__indirect_function_table,wasmMemory,wasmTable;function assignWasmExports(wasmExports){___getTypeName=wasmExports["K"];_malloc=wasmExports["L"];_free=wasmExports["N"];memory=wasmMemory=wasmExports["I"];__indirect_function_table=wasmTable=wasmExports["M"]}var wasmImports={H:___cxa_throw,z:__abort_js,r:__embind_finalize_value_object,y:__embind_register_bigint,F:__embind_register_bool,v:__embind_register_class,u:__embind_register_class_constructor,c:__embind_register_class_function,n:__embind_register_constant,D:__embind_register_emval,p:__embind_register_enum,a:__embind_register_enum_value,x:__embind_register_float,i:__embind_register_function,m:__embind_register_integer,j:__embind_register_memory_view,E:__embind_register_std_string,t:__embind_register_std_wstring,s:__embind_register_value_object,d:__embind_register_value_object_field,G:__embind_register_void,h:__emval_create_invoker,b:__emval_decref,q:__emval_get_global,o:__emval_get_module_property,k:__emval_get_property,g:__emval_incref,f:__emval_invoke,l:__emval_new_cstring,e:__emval_run_destructors,A:_emscripten_resize_heap,C:_fd_close,B:_fd_seek,w:_fd_write};function run(){preRun();function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +var BASIS=(()=>{var _scriptName=globalThis.document?.currentScript?.src;return async function(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};if(typeof __filename!="undefined"){_scriptName=__filename}else if(ENVIRONMENT_IS_WORKER){_scriptName=self.location.href}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("fs");scriptDirectory=__dirname+"/";readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{if(isFileURI(url)){return new Promise((resolve,reject)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response);return}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");var readyPromiseResolve,readyPromiseReject;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;wasmExports["H"]()}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("basis_transcoder.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={a:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);updateMemoryViews();return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){HEAPU32[this.ptr+4>>2]=type}get_type(){return HEAPU32[this.ptr+4>>2]}set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}get_destructor(){return HEAPU32[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}get_caught(){return HEAP8[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}get_rethrown(){return HEAP8[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}}var exceptionLast=0;var uncaughtExceptionCount=0;var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};var __abort_js=()=>abort("");var structRegistrations={};var runDestructors=destructors=>{while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}};function readPointer(pointer){return this.fromWireType(HEAPU32[pointer>>2])}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var InternalError=class InternalError extends Error{constructor(message){super(message);this.name="InternalError"}};var throwInternalError=message=>{throw new InternalError(message)};var whenDependentTypesAreResolved=(myTypes,dependentTypes,getTypeConverters)=>{myTypes.forEach(type=>typeDependencies[type]=dependentTypes);function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}}if(0===unregisteredTypes.length){onComplete(typeConverters)}};var __embind_finalize_value_object=structType=>{var reg=structRegistrations[structType];delete structRegistrations[structType];var rawConstructor=reg.rawConstructor;var rawDestructor=reg.rawDestructor;var fieldRecords=reg.fields;var fieldTypes=fieldRecords.map(field=>field.getterReturnType).concat(fieldRecords.map(field=>field.setterArgumentType));whenDependentTypesAreResolved([structType],fieldTypes,fieldTypes=>{var fields={};for(var[i,field]of fieldRecords.entries()){const getterReturnType=fieldTypes[i];const getter=field.getter;const getterContext=field.getterContext;const setterArgumentType=fieldTypes[i+fieldRecords.length];const setter=field.setter;const setterContext=field.setterContext;fields[field.fieldName]={read:ptr=>getterReturnType.fromWireType(getter(getterContext,ptr)),write:(ptr,o)=>{var destructors=[];setter(setterContext,ptr,setterArgumentType.toWireType(destructors,o));runDestructors(destructors)},optional:getterReturnType.optional}}return[{name:reg.name,fromWireType:ptr=>{var rv={};for(var i in fields){rv[i]=fields[i].read(ptr)}rawDestructor(ptr);return rv},toWireType:(destructors,o)=>{for(var fieldName in fields){if(!(fieldName in o)&&!fields[fieldName].optional){throw new TypeError(`Missing field: "${fieldName}"`)}}var ptr=rawConstructor();for(fieldName in fields){fields[fieldName].write(ptr,o[fieldName])}if(destructors!==null){destructors.push(rawDestructor,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction:rawDestructor}]})};var AsciiToString=ptr=>{var str="";while(1){var ch=HEAPU8[ptr++];if(!ch)return str;str+=String.fromCharCode(ch)}};var BindingError=class BindingError extends Error{constructor(message){super(message);this.name="BindingError"}};var throwBindingError=message=>{throw new BindingError(message)};function sharedRegisterType(rawType,registeredInstance,options={}){var name=registeredInstance.name;if(!rawType){throwBindingError(`type "${name}" must have a positive integer typeid pointer`)}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError(`Cannot register type '${name}' twice`)}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function registerType(rawType,registeredInstance,options={}){return sharedRegisterType(rawType,registeredInstance,options)}var integerReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?pointer=>HEAP8[pointer]:pointer=>HEAPU8[pointer];case 2:return signed?pointer=>HEAP16[pointer>>1]:pointer=>HEAPU16[pointer>>1];case 4:return signed?pointer=>HEAP32[pointer>>2]:pointer=>HEAPU32[pointer>>2];case 8:return signed?pointer=>HEAP64[pointer>>3]:pointer=>HEAPU64[pointer>>3];default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_bigint=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0n;let fromWireType=value=>value;if(isUnsignedType){const bitSize=size*8;fromWireType=value=>BigInt.asUintN(bitSize,value);maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>{if(typeof value=="number"){value=BigInt(value)}return value},readValueFromPointer:integerReadValueFromPointer(name,size,!isUnsignedType),destructorFunction:null})};var __embind_register_bool=(rawType,name,trueValue,falseValue)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:function(wt){return!!wt},toWireType:function(destructors,o){return o?trueValue:falseValue},readValueFromPointer:function(pointer){return this.fromWireType(HEAPU8[pointer])},destructorFunction:null})};var shallowCopyInternalPointer=o=>({count:o.count,deleteScheduled:o.deleteScheduled,preservePointerOnDelete:o.preservePointerOnDelete,ptr:o.ptr,ptrType:o.ptrType,smartPtr:o.smartPtr,smartPtrType:o.smartPtrType});var throwInstanceAlreadyDeleted=obj=>{function getInstanceTypeName(handle){return handle.$$.ptrType.registeredClass.name}throwBindingError(getInstanceTypeName(obj)+" instance already deleted")};var finalizationRegistry=false;var detachFinalizer=handle=>{};var runDestructor=$$=>{if($$.smartPtr){$$.smartPtrType.rawDestructor($$.smartPtr)}else{$$.ptrType.registeredClass.rawDestructor($$.ptr)}};var releaseClassHandle=$$=>{$$.count.value-=1;var toDelete=0===$$.count.value;if(toDelete){runDestructor($$)}};var attachFinalizer=handle=>{if(!globalThis.FinalizationRegistry){attachFinalizer=handle=>handle;return handle}finalizationRegistry=new FinalizationRegistry(info=>{releaseClassHandle(info.$$)});attachFinalizer=handle=>{var $$=handle.$$;var hasSmartPtr=!!$$.smartPtr;if(hasSmartPtr){var info={$$};finalizationRegistry.register(handle,info,handle)}return handle};detachFinalizer=handle=>finalizationRegistry.unregister(handle);return attachFinalizer(handle)};var deletionQueue=[];var flushPendingDeletes=()=>{while(deletionQueue.length){var obj=deletionQueue.pop();obj.$$.deleteScheduled=false;obj["delete"]()}};var delayFunction;var init_ClassHandle=()=>{let proto=ClassHandle.prototype;Object.assign(proto,{isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(other instanceof ClassHandle)){return false}var leftClass=this.$$.ptrType.registeredClass;var left=this.$$.ptr;other.$$=other.$$;var rightClass=other.$$.ptrType.registeredClass;var right=other.$$.ptr;while(leftClass.baseClass){left=leftClass.upcast(left);leftClass=leftClass.baseClass}while(rightClass.baseClass){right=rightClass.upcast(right);rightClass=rightClass.baseClass}return leftClass===rightClass&&left===right},clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.preservePointerOnDelete){this.$$.count.value+=1;return this}else{var clone=attachFinalizer(Object.create(Object.getPrototypeOf(this),{$$:{value:shallowCopyInternalPointer(this.$$)}}));clone.$$.count.value+=1;clone.$$.deleteScheduled=false;return clone}},delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}detachFinalizer(this);releaseClassHandle(this.$$);if(!this.$$.preservePointerOnDelete){this.$$.smartPtr=undefined;this.$$.ptr=undefined}},isDeleted(){return!this.$$.ptr},deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.deleteScheduled&&!this.$$.preservePointerOnDelete){throwBindingError("Object already scheduled for deletion")}deletionQueue.push(this);if(deletionQueue.length===1&&delayFunction){delayFunction(flushPendingDeletes)}this.$$.deleteScheduled=true;return this}});const symbolDispose=Symbol.dispose;if(symbolDispose){proto[symbolDispose]=proto["delete"]}};function ClassHandle(){}var createNamedFunction=(name,func)=>Object.defineProperty(func,"name",{value:name});var registeredPointers={};var ensureOverloadTable=(proto,methodName,humanName)=>{if(undefined===proto[methodName].overloadTable){var prevFunc=proto[methodName];proto[methodName]=function(...args){if(!proto[methodName].overloadTable.hasOwnProperty(args.length)){throwBindingError(`Function '${humanName}' called with an invalid number of arguments (${args.length}) - expects one of (${proto[methodName].overloadTable})!`)}return proto[methodName].overloadTable[args.length].apply(this,args)};proto[methodName].overloadTable=[];proto[methodName].overloadTable[prevFunc.argCount]=prevFunc}};var exposePublicSymbol=(name,value,numArguments)=>{if(Module.hasOwnProperty(name)){if(undefined===numArguments||undefined!==Module[name].overloadTable&&undefined!==Module[name].overloadTable[numArguments]){throwBindingError(`Cannot register public name '${name}' twice`)}ensureOverloadTable(Module,name,name);if(Module[name].overloadTable.hasOwnProperty(numArguments)){throwBindingError(`Cannot register multiple overloads of a function with the same number of arguments (${numArguments})!`)}Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var char_0=48;var char_9=57;var makeLegalFunctionName=name=>{name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return`_${name}`}return name};function RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast){this.name=name;this.constructor=constructor;this.instancePrototype=instancePrototype;this.rawDestructor=rawDestructor;this.baseClass=baseClass;this.getActualType=getActualType;this.upcast=upcast;this.downcast=downcast;this.pureVirtualFunctions=[]}var upcastPointer=(ptr,ptrClass,desiredClass)=>{while(ptrClass!==desiredClass){if(!ptrClass.upcast){throwBindingError(`Expected null or instance of ${desiredClass.name}, got an instance of ${ptrClass.name}`)}ptr=ptrClass.upcast(ptr);ptrClass=ptrClass.baseClass}return ptr};var embindRepr=v=>{if(v===null){return"null"}var t=typeof v;if(t==="object"||t==="array"||t==="function"){return v.toString()}else{return""+v}};function constNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}function genericPointerToWireType(destructors,handle){var ptr;if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}if(this.isSmartPointer){ptr=this.rawConstructor();if(destructors!==null){destructors.push(this.rawDestructor,ptr)}return ptr}else{return 0}}if(!handle||!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(!this.isConst&&handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);if(this.isSmartPointer){if(undefined===handle.$$.smartPtr){throwBindingError("Passing raw pointer to smart pointer is illegal")}switch(this.sharingPolicy){case 0:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{throwBindingError(`Cannot convert argument of type ${handle.$$.smartPtrType?handle.$$.smartPtrType.name:handle.$$.ptrType.name} to parameter type ${this.name}`)}break;case 1:ptr=handle.$$.smartPtr;break;case 2:if(handle.$$.smartPtrType===this){ptr=handle.$$.smartPtr}else{var clonedHandle=handle["clone"]();ptr=this.rawShare(ptr,Emval.toHandle(()=>clonedHandle["delete"]()));if(destructors!==null){destructors.push(this.rawDestructor,ptr)}}break;default:throwBindingError("Unsupporting sharing policy")}}return ptr}function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(handle===null){if(this.isReference){throwBindingError(`null is not a valid ${this.name}`)}return 0}if(!handle.$$){throwBindingError(`Cannot pass "${embindRepr(handle)}" as a ${this.name}`)}if(!handle.$$.ptr){throwBindingError(`Cannot pass deleted object as a pointer of type ${this.name}`)}if(handle.$$.ptrType.isConst){throwBindingError(`Cannot convert argument of type ${handle.$$.ptrType.name} to parameter type ${this.name}`)}var handleClass=handle.$$.ptrType.registeredClass;var ptr=upcastPointer(handle.$$.ptr,handleClass,this.registeredClass);return ptr}var downcastPointer=(ptr,ptrClass,desiredClass)=>{if(ptrClass===desiredClass){return ptr}if(undefined===desiredClass.baseClass){return null}var rv=downcastPointer(ptr,ptrClass,desiredClass.baseClass);if(rv===null){return null}return desiredClass.downcast(rv)};var registeredInstances={};var getBasestPointer=(class_,ptr)=>{if(ptr===undefined){throwBindingError("ptr should not be undefined")}while(class_.baseClass){ptr=class_.upcast(ptr);class_=class_.baseClass}return ptr};var getInheritedInstance=(class_,ptr)=>{ptr=getBasestPointer(class_,ptr);return registeredInstances[ptr]};var makeClassHandle=(prototype,record)=>{if(!record.ptrType||!record.ptr){throwInternalError("makeClassHandle requires ptr and ptrType")}var hasSmartPtrType=!!record.smartPtrType;var hasSmartPtr=!!record.smartPtr;if(hasSmartPtrType!==hasSmartPtr){throwInternalError("Both smartPtrType and smartPtr must be specified")}record.count={value:1};return attachFinalizer(Object.create(prototype,{$$:{value:record,writable:true}}))};function RegisteredPointer_fromWireType(ptr){var rawPointer=this.getPointee(ptr);if(!rawPointer){this.destructor(ptr);return null}var registeredInstance=getInheritedInstance(this.registeredClass,rawPointer);if(undefined!==registeredInstance){if(0===registeredInstance.$$.count.value){registeredInstance.$$.ptr=rawPointer;registeredInstance.$$.smartPtr=ptr;return registeredInstance["clone"]()}else{var rv=registeredInstance["clone"]();this.destructor(ptr);return rv}}function makeDefaultHandle(){if(this.isSmartPointer){return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this.pointeeType,ptr:rawPointer,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(this.registeredClass.instancePrototype,{ptrType:this,ptr})}}var actualType=this.registeredClass.getActualType(rawPointer);var registeredPointerRecord=registeredPointers[actualType];if(!registeredPointerRecord){return makeDefaultHandle.call(this)}var toType;if(this.isConst){toType=registeredPointerRecord.constPointerType}else{toType=registeredPointerRecord.pointerType}var dp=downcastPointer(rawPointer,this.registeredClass,toType.registeredClass);if(dp===null){return makeDefaultHandle.call(this)}if(this.isSmartPointer){return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp,smartPtrType:this,smartPtr:ptr})}else{return makeClassHandle(toType.registeredClass.instancePrototype,{ptrType:toType,ptr:dp})}}var init_RegisteredPointer=()=>{Object.assign(RegisteredPointer.prototype,{getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}return ptr},destructor(ptr){this.rawDestructor?.(ptr)},readValueFromPointer:readPointer,fromWireType:RegisteredPointer_fromWireType})};function RegisteredPointer(name,registeredClass,isReference,isConst,isSmartPointer,pointeeType,sharingPolicy,rawGetPointee,rawConstructor,rawShare,rawDestructor){this.name=name;this.registeredClass=registeredClass;this.isReference=isReference;this.isConst=isConst;this.isSmartPointer=isSmartPointer;this.pointeeType=pointeeType;this.sharingPolicy=sharingPolicy;this.rawGetPointee=rawGetPointee;this.rawConstructor=rawConstructor;this.rawShare=rawShare;this.rawDestructor=rawDestructor;if(!isSmartPointer&®isteredClass.baseClass===undefined){if(isConst){this.toWireType=constNoSmartPtrRawPointerToWireType;this.destructorFunction=null}else{this.toWireType=nonConstNoSmartPtrRawPointerToWireType;this.destructorFunction=null}}else{this.toWireType=genericPointerToWireType}}var replacePublicSymbol=(name,value,numArguments)=>{if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistent public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}};var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var embind__requireFunction=(signature,rawFunction,isAsync=false)=>{signature=AsciiToString(signature);function makeDynCaller(){var rtn=getWasmTableEntry(rawFunction);return rtn}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError(`unknown function pointer with signature ${signature}: ${rawFunction}`)}return fp};class UnboundTypeError extends Error{}var getTypeName=type=>{var ptr=___getTypeName(type);var rv=AsciiToString(ptr);_free(ptr);return rv};var throwUnboundTypeError=(message,types)=>{var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(`${message}: `+unboundTypes.map(getTypeName).join([", "]))};var __embind_register_class=(rawType,rawPointerType,rawConstPointerType,baseClassRawType,getActualTypeSignature,getActualType,upcastSignature,upcast,downcastSignature,downcast,name,destructorSignature,rawDestructor)=>{name=AsciiToString(name);getActualType=embind__requireFunction(getActualTypeSignature,getActualType);upcast&&=embind__requireFunction(upcastSignature,upcast);downcast&&=embind__requireFunction(downcastSignature,downcast);rawDestructor=embind__requireFunction(destructorSignature,rawDestructor);var legalFunctionName=makeLegalFunctionName(name);exposePublicSymbol(legalFunctionName,function(){throwUnboundTypeError(`Cannot construct ${name} due to unbound types`,[baseClassRawType])});whenDependentTypesAreResolved([rawType,rawPointerType,rawConstPointerType],baseClassRawType?[baseClassRawType]:[],base=>{base=base[0];var baseClass;var basePrototype;if(baseClassRawType){baseClass=base.registeredClass;basePrototype=baseClass.instancePrototype}else{basePrototype=ClassHandle.prototype}var constructor=createNamedFunction(name,function(...args){if(Object.getPrototypeOf(this)!==instancePrototype){throw new BindingError(`Use 'new' to construct ${name}`)}if(undefined===registeredClass.constructor_body){throw new BindingError(`${name} has no accessible constructor`)}var body=registeredClass.constructor_body[args.length];if(undefined===body){throw new BindingError(`Tried to invoke ctor of ${name} with invalid number of parameters (${args.length}) - expected (${Object.keys(registeredClass.constructor_body).toString()}) parameters instead!`)}return body.apply(this,args)});var instancePrototype=Object.create(basePrototype,{constructor:{value:constructor}});constructor.prototype=instancePrototype;var registeredClass=new RegisteredClass(name,constructor,instancePrototype,rawDestructor,baseClass,getActualType,upcast,downcast);if(registeredClass.baseClass){registeredClass.baseClass.__derivedClasses??=[];registeredClass.baseClass.__derivedClasses.push(registeredClass)}var referenceConverter=new RegisteredPointer(name,registeredClass,true,false,false);var pointerConverter=new RegisteredPointer(name+"*",registeredClass,false,false,false);var constPointerConverter=new RegisteredPointer(name+" const*",registeredClass,false,true,false);registeredPointers[rawType]={pointerType:pointerConverter,constPointerType:constPointerConverter};replacePublicSymbol(legalFunctionName,constructor);return[referenceConverter,pointerConverter,constPointerConverter]})};var heap32VectorToArray=(count,firstElement)=>{var array=[];for(var i=0;i>2])}return array};function usesDestructorStack(argTypes){for(var i=1;i{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);invoker=embind__requireFunction(invokerSignature,invoker);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`constructor ${classType.name}`;if(undefined===classType.registeredClass.constructor_body){classType.registeredClass.constructor_body=[]}if(undefined!==classType.registeredClass.constructor_body[argCount-1]){throw new BindingError(`Cannot register multiple constructors with identical number of parameters (${argCount-1}) for class '${classType.name}'! Overload resolution is currently only performed using the parameter count, not actual type info!`)}classType.registeredClass.constructor_body[argCount-1]=()=>{throwUnboundTypeError(`Cannot construct ${classType.name} due to unbound types`,rawArgTypes)};whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{argTypes.splice(1,0,null);classType.registeredClass.constructor_body[argCount-1]=craftInvokerFunction(humanName,argTypes,null,invoker,rawConstructor);return[]});return[]})};var getFunctionName=signature=>{signature=signature.trim();const argsIndex=signature.indexOf("(");if(argsIndex===-1)return signature;return signature.slice(0,argsIndex)};var __embind_register_class_function=(rawClassType,methodName,argCount,rawArgTypesAddr,invokerSignature,rawInvoker,context,isPureVirtual,isAsync,isNonnullReturn)=>{var rawArgTypes=heap32VectorToArray(argCount,rawArgTypesAddr);methodName=AsciiToString(methodName);methodName=getFunctionName(methodName);rawInvoker=embind__requireFunction(invokerSignature,rawInvoker,isAsync);whenDependentTypesAreResolved([],[rawClassType],classType=>{classType=classType[0];var humanName=`${classType.name}.${methodName}`;if(methodName.startsWith("@@")){methodName=Symbol[methodName.substring(2)]}if(isPureVirtual){classType.registeredClass.pureVirtualFunctions.push(methodName)}function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${humanName} due to unbound types`,rawArgTypes)}var proto=classType.registeredClass.instancePrototype;var method=proto[methodName];if(undefined===method||undefined===method.overloadTable&&method.className!==classType.name&&method.argCount===argCount-2){unboundTypesHandler.argCount=argCount-2;unboundTypesHandler.className=classType.name;proto[methodName]=unboundTypesHandler}else{ensureOverloadTable(proto,methodName,humanName);proto[methodName].overloadTable[argCount-2]=unboundTypesHandler}whenDependentTypesAreResolved([],rawArgTypes,argTypes=>{var memberFunction=craftInvokerFunction(humanName,argTypes,classType,rawInvoker,context,isAsync);if(undefined===proto[methodName].overloadTable){memberFunction.argCount=argCount-2;proto[methodName]=memberFunction}else{proto[methodName].overloadTable[argCount-2]=memberFunction}return[]});return[]})};var __embind_register_constant=(name,type,value)=>{name=AsciiToString(name);whenDependentTypesAreResolved([],[type],type=>{type=type[0];Module[name]=type.fromWireType(value);return[]})};var emval_freelist=[];var emval_handles=[0,1,,1,null,1,true,1,false,1];var __emval_decref=handle=>{if(handle>9&&0===--emval_handles[handle+1]){emval_handles[handle]=undefined;emval_freelist.push(handle)}};var Emval={toValue:handle=>{if(!handle){throwBindingError(`Cannot use deleted val. handle = ${handle}`)}return emval_handles[handle]},toHandle:value=>{switch(value){case undefined:return 2;case null:return 4;case true:return 6;case false:return 8;default:{const handle=emval_freelist.pop()||emval_handles.length;emval_handles[handle]=value;emval_handles[handle+1]=1;return handle}}}};var EmValType={name:"emscripten::val",fromWireType:handle=>{var rv=Emval.toValue(handle);__emval_decref(handle);return rv},toWireType:(destructors,value)=>Emval.toHandle(value),readValueFromPointer:readPointer,destructorFunction:null};var __embind_register_emval=rawType=>registerType(rawType,EmValType);var enumReadValueFromPointer=(name,width,signed)=>{switch(width){case 1:return signed?function(pointer){return this.fromWireType(HEAP8[pointer])}:function(pointer){return this.fromWireType(HEAPU8[pointer])};case 2:return signed?function(pointer){return this.fromWireType(HEAP16[pointer>>1])}:function(pointer){return this.fromWireType(HEAPU16[pointer>>1])};case 4:return signed?function(pointer){return this.fromWireType(HEAP32[pointer>>2])}:function(pointer){return this.fromWireType(HEAPU32[pointer>>2])};default:throw new TypeError(`invalid integer width (${width}): ${name}`)}};var __embind_register_enum=(rawType,name,size,isSigned)=>{name=AsciiToString(name);function ctor(){}ctor.values={};registerType(rawType,{name,constructor:ctor,fromWireType:function(c){return this.constructor.values[c]},toWireType:(destructors,c)=>c.value,readValueFromPointer:enumReadValueFromPointer(name,size,isSigned),destructorFunction:null});exposePublicSymbol(name,ctor)};var requireRegisteredType=(rawType,humanName)=>{var impl=registeredTypes[rawType];if(undefined===impl){throwBindingError(`${humanName} has unknown type ${getTypeName(rawType)}`)}return impl};var __embind_register_enum_value=(rawEnumType,name,enumValue)=>{var enumType=requireRegisteredType(rawEnumType,"enum");name=AsciiToString(name);var Enum=enumType.constructor;var Value=Object.create(enumType.constructor.prototype,{value:{value:enumValue},constructor:{value:createNamedFunction(`${enumType.name}_${name}`,function(){})}});Enum.values[enumValue]=Value;Enum[name]=Value};var floatReadValueFromPointer=(name,width)=>{switch(width){case 4:return function(pointer){return this.fromWireType(HEAPF32[pointer>>2])};case 8:return function(pointer){return this.fromWireType(HEAPF64[pointer>>3])};default:throw new TypeError(`invalid float width (${width}): ${name}`)}};var __embind_register_float=(rawType,name,size)=>{name=AsciiToString(name);registerType(rawType,{name,fromWireType:value=>value,toWireType:(destructors,value)=>value,readValueFromPointer:floatReadValueFromPointer(name,size),destructorFunction:null})};var __embind_register_function=(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn,isAsync,isNonnullReturn)=>{var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=AsciiToString(name);name=getFunctionName(name);rawInvoker=embind__requireFunction(signature,rawInvoker,isAsync);exposePublicSymbol(name,function(){throwUnboundTypeError(`Cannot call ${name} due to unbound types`,argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,argTypes=>{var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn,isAsync),argCount-1);return[]})};var __embind_register_integer=(primitiveType,name,size,minRange,maxRange)=>{name=AsciiToString(name);const isUnsignedType=minRange===0;let fromWireType=value=>value;if(isUnsignedType){var bitshift=32-8*size;fromWireType=value=>value<>>bitshift;maxRange=fromWireType(maxRange)}registerType(primitiveType,{name,fromWireType,toWireType:(destructors,value)=>value,readValueFromPointer:integerReadValueFromPointer(name,size,minRange!==0),destructorFunction:null})};var __embind_register_memory_view=(rawType,dataTypeIndex,name)=>{var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array,BigInt64Array,BigUint64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){var size=HEAPU32[handle>>2];var data=HEAPU32[handle+4>>2];return new TA(HEAP8.buffer,data,size)}name=AsciiToString(name);registerType(rawType,{name,fromWireType:decodeMemoryView,readValueFromPointer:decodeMemoryView},{ignoreDuplicateRegistrations:true})};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite);var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var UTF8Decoder=globalThis.TextDecoder&&new TextDecoder;var findStringEnd=(heapOrArray,idx,maxBytesToRead,ignoreNul)=>{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead,ignoreNul):"";var __embind_register_std_string=(rawType,name)=>{name=AsciiToString(name);var stdStringIsUTF8=true;registerType(rawType,{name,fromWireType(value){var length=HEAPU32[value>>2];var payload=value+4;var str;if(stdStringIsUTF8){str=UTF8ToString(payload,length,true)}else{str="";for(var i=0;i>2]=length;if(valueIsOfTypeString){if(stdStringIsUTF8){stringToUTF8(value,ptr,length+1)}else{for(var i=0;i255){_free(base);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+i]=charCode}}}else{HEAPU8.set(value,ptr)}if(destructors!==null){destructors.push(_free,base)}return base},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var UTF16Decoder=globalThis.TextDecoder?new TextDecoder("utf-16le"):undefined;var UTF16ToString=(ptr,maxBytesToRead,ignoreNul)=>{var idx=ptr>>1;var endIdx=findStringEnd(HEAPU16,idx,maxBytesToRead/2,ignoreNul);if(endIdx-idx>16&&UTF16Decoder)return UTF16Decoder.decode(HEAPU16.subarray(idx,endIdx));var str="";for(var i=idx;i{maxBytesToWrite??=2147483647;if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr};var lengthBytesUTF16=str=>str.length*2;var UTF32ToString=(ptr,maxBytesToRead,ignoreNul)=>{var str="";var startIdx=ptr>>2;for(var i=0;!(i>=maxBytesToRead/4);i++){var utf32=HEAPU32[startIdx+i];if(!utf32&&!ignoreNul)break;str+=String.fromCodePoint(utf32)}return str};var stringToUTF32=(str,outPtr,maxBytesToWrite)=>{maxBytesToWrite??=2147483647;if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i65535){i++}HEAP32[outPtr>>2]=codePoint;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr};var lengthBytesUTF32=str=>{var len=0;for(var i=0;i65535){i++}len+=4}return len};var __embind_register_std_wstring=(rawType,charSize,name)=>{name=AsciiToString(name);var decodeString,encodeString,lengthBytesUTF;if(charSize===2){decodeString=UTF16ToString;encodeString=stringToUTF16;lengthBytesUTF=lengthBytesUTF16}else{decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32}registerType(rawType,{name,fromWireType:value=>{var length=HEAPU32[value>>2];var str=decodeString(value+4,length*charSize,true);_free(value);return str},toWireType:(destructors,value)=>{if(!(typeof value=="string")){throwBindingError(`Cannot pass non-string to C++ string type ${name}`)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length/charSize;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},readValueFromPointer:readPointer,destructorFunction(ptr){_free(ptr)}})};var __embind_register_value_object=(rawType,name,constructorSignature,rawConstructor,destructorSignature,rawDestructor)=>{structRegistrations[rawType]={name:AsciiToString(name),rawConstructor:embind__requireFunction(constructorSignature,rawConstructor),rawDestructor:embind__requireFunction(destructorSignature,rawDestructor),fields:[]}};var __embind_register_value_object_field=(structType,fieldName,getterReturnType,getterSignature,getter,getterContext,setterArgumentType,setterSignature,setter,setterContext)=>{structRegistrations[structType].fields.push({fieldName:AsciiToString(fieldName),getterReturnType,getter:embind__requireFunction(getterSignature,getter),getterContext,setterArgumentType,setter:embind__requireFunction(setterSignature,setter),setterContext})};var __embind_register_void=(rawType,name)=>{name=AsciiToString(name);registerType(rawType,{isVoid:true,name,fromWireType:()=>undefined,toWireType:(destructors,o)=>undefined})};var emval_methodCallers=[];var emval_addMethodCaller=caller=>{var id=emval_methodCallers.length;emval_methodCallers.push(caller);return id};var emval_lookupTypes=(argCount,argTypes)=>{var a=new Array(argCount);for(var i=0;i>2],`parameter ${i}`)}return a};var emval_returnValue=(toReturnWire,destructorsRef,handle)=>{var destructors=[];var result=toReturnWire(destructors,handle);if(destructors.length){HEAPU32[destructorsRef>>2]=Emval.toHandle(destructors)}return result};var emval_symbols={};var getStringOrSymbol=address=>{var symbol=emval_symbols[address];if(symbol===undefined){return AsciiToString(address)}return symbol};var __emval_create_invoker=(argCount,argTypesPtr,kind)=>{var GenericWireTypeSize=8;var[retType,...argTypes]=emval_lookupTypes(argCount,argTypesPtr);var toReturnWire=retType.toWireType.bind(retType);var argFromPtr=argTypes.map(type=>type.readValueFromPointer.bind(type));argCount--;var captures={toValue:Emval.toValue};var args=argFromPtr.map((argFromPtr,i)=>{var captureName=`argFromPtr${i}`;captures[captureName]=argFromPtr;return`${captureName}(args${i?"+"+i*GenericWireTypeSize:""})`});var functionBody;switch(kind){case 0:functionBody="toValue(handle)";break;case 2:functionBody="new (toValue(handle))";break;case 3:functionBody="";break;case 1:captures["getStringOrSymbol"]=getStringOrSymbol;functionBody="toValue(handle)[getStringOrSymbol(methodName)]";break}functionBody+=`(${args})`;if(!retType.isVoid){captures["toReturnWire"]=toReturnWire;captures["emval_returnValue"]=emval_returnValue;functionBody=`return emval_returnValue(toReturnWire, destructorsRef, ${functionBody})`}functionBody=`return function (handle, methodName, destructorsRef, args) {\n ${functionBody}\n }`;var invokerFunction=new Function(Object.keys(captures),functionBody)(...Object.values(captures));var functionName=`methodCaller<(${argTypes.map(t=>t.name)}) => ${retType.name}>`;return emval_addMethodCaller(createNamedFunction(functionName,invokerFunction))};var __emval_get_property=(handle,key)=>{handle=Emval.toValue(handle);key=Emval.toValue(key);return Emval.toHandle(handle[key])};var __emval_incref=handle=>{if(handle>9){emval_handles[handle+1]+=1}};var __emval_invoke=(caller,handle,methodName,destructorsRef,args)=>emval_methodCallers[caller](handle,methodName,destructorsRef,args);var __emval_new_cstring=v=>Emval.toHandle(getStringOrSymbol(v));var __emval_run_destructors=handle=>{var destructors=Emval.toValue(handle);runDestructors(destructors);__emval_decref(handle)};var getHeapMax=()=>2147483648;var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var _fd_close=fd=>52;var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function _fd_seek(fd,offset,whence,newOffset){offset=bigintToI53Checked(offset);return 70}var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer));buffer.length=0}else{buffer.push(curr)}};var _fd_write=(fd,iov,iovcnt,pnum)=>{var num=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;for(var j=0;j>2]=num;return 0};init_ClassHandle();init_RegisteredPointer();{if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}var ___getTypeName,_malloc,_free,memory,__indirect_function_table,wasmMemory,wasmTable;function assignWasmExports(wasmExports){___getTypeName=wasmExports["I"];_malloc=wasmExports["J"];_free=wasmExports["L"];memory=wasmMemory=wasmExports["G"];__indirect_function_table=wasmTable=wasmExports["K"]}var wasmImports={F:___cxa_throw,x:__abort_js,q:__embind_finalize_value_object,w:__embind_register_bigint,D:__embind_register_bool,t:__embind_register_class,s:__embind_register_class_constructor,b:__embind_register_class_function,k:__embind_register_constant,B:__embind_register_emval,o:__embind_register_enum,a:__embind_register_enum_value,v:__embind_register_float,e:__embind_register_function,l:__embind_register_integer,i:__embind_register_memory_view,C:__embind_register_std_string,r:__embind_register_std_wstring,p:__embind_register_value_object,c:__embind_register_value_object_field,E:__embind_register_void,h:__emval_create_invoker,d:__emval_decref,m:__emval_get_property,j:__emval_incref,g:__emval_invoke,n:__emval_new_cstring,f:__emval_run_destructors,y:_emscripten_resize_heap,A:_fd_close,z:_fd_seek,u:_fd_write};function run(){preRun();function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} ;return moduleRtn}})();if(typeof exports==="object"&&typeof module==="object"){module.exports=BASIS;module.exports.default=BASIS}else if(typeof define==="function"&&define["amd"])define([],()=>BASIS); diff --git a/external/basis_universal/webgl/transcoder/build/basis_transcoder.wasm b/external/basis_universal/webgl/transcoder/build/basis_transcoder.wasm index 28fff9aced..78787d3e05 100644 Binary files a/external/basis_universal/webgl/transcoder/build/basis_transcoder.wasm and b/external/basis_universal/webgl/transcoder/build/basis_transcoder.wasm differ diff --git a/external/basis_universal/webgl/video_test/index.html b/external/basis_universal/webgl/video_test/index.html index 9da0a14b19..114ba01122 100644 --- a/external/basis_universal/webgl/video_test/index.html +++ b/external/basis_universal/webgl/video_test/index.html @@ -1,8 +1,9 @@ @@ -11,33 +12,52 @@ @@ -585,13 +770,12 @@
- Basis Universal ETC1S GPU Texture Video Transcoding Test + Basis Universal - GPU Texture Video Transcoding Test
-
This demo uses the Basis C++ transcoder (compiled to WebAssembly using Emscripten) to transcode a .basis Universal Texture Video file directly to GPU texture data. -
.basis universal GPU texture files can be quickly transcoded directly to any other GPU texture format with little to no quality loss. -
Thanks to Evan Parker for providing webgl-texture-utils and this test bed. Go back. +
This demo uses the Basis C++ transcoder (compiled to WebAssembly using Emscripten) to transcode a .basis Universal Texture file directly to GPU texture data. +
Thanks to Evan Parker for providing webgl-texture-utils and the original test bed. Go back.
Transcode Format: FORMAT
@@ -633,7 +817,8 @@