From ef5bf361c12e55591c81db2de3c799af269ede91 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Wed, 19 Feb 2025 21:50:37 -0500 Subject: [PATCH 1/4] Implement F16 Support Co-authored-by: FL33TW00D Co-authored-by: ErichDonGubler --- Cargo.lock | 24 + Cargo.toml | 2 + naga/Cargo.toml | 11 +- naga/src/back/glsl/mod.rs | 3 + naga/src/back/hlsl/writer.rs | 1 + naga/src/back/msl/writer.rs | 34 +- naga/src/back/spv/block.rs | 386 ++++--- naga/src/back/spv/instructions.rs | 4 + naga/src/back/spv/writer.rs | 26 + naga/src/back/wgsl/writer.rs | 40 + naga/src/common/wgsl/to_wgsl.rs | 1 + naga/src/front/glsl/error.rs | 12 +- naga/src/front/glsl/offset.rs | 24 +- naga/src/front/glsl/types.rs | 5 + naga/src/front/spv/mod.rs | 5 + naga/src/front/wgsl/error.rs | 16 +- naga/src/front/wgsl/lower/mod.rs | 1 + naga/src/front/wgsl/parse/conv.rs | 27 +- .../wgsl/parse/directive/enable_extension.rs | 24 +- naga/src/front/wgsl/parse/lexer.rs | 40 +- naga/src/front/wgsl/parse/mod.rs | 189 +++- naga/src/front/wgsl/parse/number.rs | 23 +- naga/src/ir/mod.rs | 2 + naga/src/proc/constant_evaluator.rs | 118 ++- naga/src/proc/mod.rs | 4 + naga/src/proc/type_methods.rs | 4 + naga/src/valid/expression.rs | 2 +- naga/src/valid/interface.rs | 7 +- naga/src/valid/mod.rs | 4 +- naga/src/valid/type.rs | 71 +- naga/tests/in/glsl/f16-glsl.comp | 57 + naga/tests/in/glsl/f16-glsl.toml | 1 + naga/tests/in/spv/f16-spv.comp | 57 + naga/tests/in/spv/f16-spv.spvasm | 130 +++ naga/tests/in/spv/f16-spv.toml | 1 + naga/tests/in/wgsl/extra.wgsl | 2 +- naga/tests/in/wgsl/f16.toml | 15 + naga/tests/in/wgsl/f16.wgsl | 127 +++ naga/tests/out/hlsl/f16.hlsl | 351 +++++++ naga/tests/out/hlsl/f16.ron | 12 + naga/tests/out/msl/extra.msl | 2 +- naga/tests/out/msl/f16.msl | 177 ++++ .../spv/atomicCompareExchange-int64.spvasm | 224 ++-- naga/tests/out/spv/extra.spvasm | 118 ++- naga/tests/out/spv/f16.spvasm | 633 ++++++++++++ naga/tests/out/spv/image.spvasm | 976 +++++++++--------- naga/tests/out/wgsl/extra.wgsl | 2 +- naga/tests/out/wgsl/f16-glsl.comp.wgsl | 47 + naga/tests/out/wgsl/f16-spv.wgsl | 46 + naga/tests/out/wgsl/f16.wgsl | 167 +++ naga/tests/spirv_capabilities.rs | 5 + naga/tests/wgsl_errors.rs | 107 +- tests/Cargo.toml | 1 + tests/gpu-tests/shader/mod.rs | 4 +- tests/gpu-tests/shader/struct_layout.rs | 383 +++++-- wgpu-core/src/device/mod.rs | 4 + wgpu-hal/src/dx12/adapter.rs | 20 +- wgpu-hal/src/dx12/device.rs | 3 + wgpu-hal/src/dx12/mod.rs | 1 + wgpu-hal/src/dx12/shader_compilation.rs | 6 +- wgpu-hal/src/vulkan/adapter.rs | 8 +- 61 files changed, 3807 insertions(+), 990 deletions(-) create mode 100644 naga/tests/in/glsl/f16-glsl.comp create mode 100644 naga/tests/in/glsl/f16-glsl.toml create mode 100644 naga/tests/in/spv/f16-spv.comp create mode 100644 naga/tests/in/spv/f16-spv.spvasm create mode 100644 naga/tests/in/spv/f16-spv.toml create mode 100644 naga/tests/in/wgsl/f16.toml create mode 100644 naga/tests/in/wgsl/f16.wgsl create mode 100644 naga/tests/out/hlsl/f16.hlsl create mode 100644 naga/tests/out/hlsl/f16.ron create mode 100644 naga/tests/out/msl/f16.msl create mode 100644 naga/tests/out/spv/f16.spvasm create mode 100644 naga/tests/out/wgsl/f16-glsl.comp.wgsl create mode 100644 naga/tests/out/wgsl/f16-spv.wgsl create mode 100644 naga/tests/out/wgsl/f16.wgsl diff --git a/Cargo.lock b/Cargo.lock index aae307fb04..a90f06c9a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1789,6 +1789,20 @@ dependencies = [ "crunchy", ] +[[package]] +name = "half-2" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864" +dependencies = [ + "arbitrary", + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "serde", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2193,6 +2207,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" version = "0.1.3" @@ -2393,12 +2413,14 @@ dependencies = [ "codespan-reporting", "diff", "env_logger", + "half-2", "hashbrown", "hexf-parse", "hlsl-snapshots", "indexmap", "itertools 0.13.0", "log", + "num-traits", "petgraph 0.7.1", "pp-rs", "ron", @@ -2574,6 +2596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -4843,6 +4866,7 @@ dependencies = [ "env_logger", "futures-lite", "glam", + "half-2", "image", "itertools 0.13.0", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index c2e4eea54d..b2ccb84973 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,8 @@ fern = "0.7" flume = "0.11" futures-lite = "2" glam = "0.29" +# TODO: Use `half` directly again after is released upstream. +half = { package = "half-2", version = "2.4.1" } hashbrown = { version = "0.14.5", default-features = false, features = [ "ahash", "inline-more", diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 131e0c6291..b722e7a2cb 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -41,10 +41,17 @@ msl-out = [] ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`. msl-out-if-target-apple = [] -serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"] +serialize = [ + "dep:serde", + "bitflags/serde", + "half/serde", + "hashbrown/serde", + "indexmap/serde", +] deserialize = [ "dep:serde", "bitflags/serde", + "half/serde", "hashbrown/serde", "indexmap/serde", ] @@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" } # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e codespan-reporting = { version = "0.11.0" } hashbrown.workspace = true +half = { workspace = true, features = ["arbitrary", "num-traits"] } rustc-hash.workspace = true indexmap.workspace = true log = "0.4" +num-traits = "0.2" strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true } thiserror.workspace = true diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index e517e84911..403cefd566 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2703,6 +2703,9 @@ impl<'a, W: Write> Writer<'a, W> { // decimal part even it's zero which is needed for a valid glsl float constant crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(_) => { + return Err(Error::Custom("GLSL has no 16-bit float type".into())); + } // Unsigned integers need a `u` at the end // // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 6cbba65f95..c372342449 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -2628,6 +2628,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // decimal part even it's zero crate::Literal::F64(value) => write!(self.out, "{value:?}L")?, crate::Literal::F32(value) => write!(self.out, "{value:?}")?, + crate::Literal::F16(value) => write!(self.out, "{value:?}h")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, // HLSL has no suffix for explicit i32 literals, but not using any suffix // makes the type ambiguous which prevents overload resolution from diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 8d7fab3dd1..0c4f219a1b 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -8,6 +8,9 @@ use core::{ fmt::{Display, Error as FmtError, Formatter, Write}, iter, }; +use num_traits::real::Real as _; + +use half::f16; use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo}; use crate::{ @@ -182,9 +185,11 @@ impl Display for TypeContext<'_> { write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name()) } crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]), - crate::TypeInner::Matrix { columns, rows, .. } => { - put_numeric_type(out, crate::Scalar::F32, &[rows, columns]) - } + crate::TypeInner::Matrix { + columns, + rows, + scalar, + } => put_numeric_type(out, scalar, &[rows, columns]), crate::TypeInner::Pointer { base, space } => { let sub = Self { handle: base, @@ -425,8 +430,12 @@ impl crate::Scalar { match self { Self { kind: Sk::Float, - width: _, + width: 4, } => "float", + Self { + kind: Sk::Float, + width: 2, + } => "half", Self { kind: Sk::Sint, width: 4, @@ -483,7 +492,7 @@ fn should_pack_struct_member( match *ty_inner { crate::TypeInner::Vector { size: crate::VectorSize::Tri, - scalar: scalar @ crate::Scalar { width: 4, .. }, + scalar: scalar @ crate::Scalar { width: 4 | 2, .. }, } if is_tight => Some(scalar), _ => None, } @@ -1459,6 +1468,21 @@ impl Writer { crate::Literal::F64(_) => { return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64)) } + crate::Literal::F16(value) => { + if value.is_infinite() { + let sign = if value.is_sign_negative() { "-" } else { "" }; + write!(self.out, "{sign}INFINITY")?; + } else if value.is_nan() { + write!(self.out, "NAN")?; + } else { + let suffix = if value.fract() == f16::from_f32(0.0) { + ".0h" + } else { + "h" + }; + write!(self.out, "{value}{suffix}")?; + } + } crate::Literal::F32(value) => { if value.is_infinite() { let sign = if value.is_sign_negative() { "-" } else { "" }; diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 9acb53d90d..858cc2b71a 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -4,6 +4,7 @@ Implementations for `BlockContext` methods. use alloc::vec::Vec; +use arrayvec::ArrayVec; use spirv::Word; use super::{ @@ -1612,159 +1613,7 @@ impl BlockContext<'_> { expr, kind, convert, - } => { - use crate::ScalarKind as Sk; - - let expr_id = self.cached[expr]; - let (src_scalar, src_size, is_matrix) = - match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) { - crate::TypeInner::Scalar(scalar) => (scalar, None, false), - crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false), - crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true), - ref other => { - log::error!("As source {:?}", other); - return Err(Error::Validation("Unexpected Expression::As source")); - } - }; - - enum Cast { - Identity, - Unary(spirv::Op), - Binary(spirv::Op, Word), - Ternary(spirv::Op, Word, Word), - } - - let cast = if is_matrix { - // we only support identity casts for matrices - Cast::Unary(spirv::Op::CopyObject) - } else { - match (src_scalar.kind, kind, convert) { - // Filter out identity casts. Some Adreno drivers are - // confused by no-op OpBitCast instructions. - (src_kind, kind, convert) - if src_kind == kind - && convert.filter(|&width| width != src_scalar.width).is_none() => - { - Cast::Identity - } - (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), - (_, _, None) => Cast::Unary(spirv::Op::Bitcast), - // casting to a bool - generate `OpXxxNotEqual` - (_, Sk::Bool, Some(_)) => { - let op = match src_scalar.kind { - Sk::Sint | Sk::Uint => spirv::Op::INotEqual, - Sk::Float => spirv::Op::FUnordNotEqual, - Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, src_scalar)?; - let zero_id = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: src_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - self.writer.get_constant_composite(ty, &self.temp_list) - } - None => zero_scalar_id, - }; - - Cast::Binary(op, zero_id) - } - // casting from a bool - generate `OpSelect` - (Sk::Bool, _, Some(dst_width)) => { - let dst_scalar = crate::Scalar { - kind, - width: dst_width, - }; - let zero_scalar_id = - self.writer.get_constant_scalar_with(0, dst_scalar)?; - let one_scalar_id = - self.writer.get_constant_scalar_with(1, dst_scalar)?; - let (accept_id, reject_id) = match src_size { - Some(size) => { - let ty = LocalType::Numeric(NumericType::Vector { - size, - scalar: dst_scalar, - }) - .into(); - - self.temp_list.clear(); - self.temp_list.resize(size as _, zero_scalar_id); - - let vec0_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - self.temp_list.fill(one_scalar_id); - - let vec1_id = - self.writer.get_constant_composite(ty, &self.temp_list); - - (vec1_id, vec0_id) - } - None => (one_scalar_id, zero_scalar_id), - }; - - Cast::Ternary(spirv::Op::Select, accept_id, reject_id) - } - (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), - (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), - (Sk::Float, Sk::Float, Some(dst_width)) - if src_scalar.width != dst_width => - { - Cast::Unary(spirv::Op::FConvert) - } - (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), - (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), - (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::SConvert) - } - (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { - Cast::Unary(spirv::Op::UConvert) - } - // We assume it's either an identity cast, or int-uint. - _ => Cast::Unary(spirv::Op::Bitcast), - } - }; - - let id = self.gen_id(); - let instruction = match cast { - Cast::Identity => None, - Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)), - Cast::Binary(op, operand) => Some(Instruction::binary( - op, - result_type_id, - id, - expr_id, - operand, - )), - Cast::Ternary(op, op1, op2) => Some(Instruction::ternary( - op, - result_type_id, - id, - expr_id, - op1, - op2, - )), - }; - if let Some(instruction) = instruction { - block.body.push(instruction); - id - } else { - expr_id - } - } + } => self.write_as_expression(expr, convert, kind, block, result_type_id)?, crate::Expression::ImageLoad { image, coordinate, @@ -1925,6 +1774,237 @@ impl BlockContext<'_> { Ok(()) } + /// Helper which focuses on generating the `As` expressions and the various conversions + /// that need to happen because of that. + fn write_as_expression( + &mut self, + expr: Handle, + convert: Option, + kind: crate::ScalarKind, + + block: &mut Block, + result_type_id: u32, + ) -> Result { + use crate::ScalarKind as Sk; + let expr_id = self.cached[expr]; + let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types); + + // Matrix casts needs special treatment in SPIR-V, as the cast functions + // can take vectors or scalars, but not matrices. In order to cast a matrix + // we need to cast each column of the matrix individually and construct a new + // matrix from the converted columns. + if let crate::TypeInner::Matrix { + columns, + rows, + scalar, + } = *ty + { + let Some(convert) = convert else { + // No conversion needs to be done, passes through. + return Ok(expr_id); + }; + + if convert == scalar.width { + // No conversion needs to be done, passes through. + return Ok(expr_id); + } + + if kind != Sk::Float { + // Only float conversions are supported for matrices. + return Err(Error::Validation("Matrices must be floats")); + } + + // Type of each extracted column + let column_src_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar, + }))); + + // Type of the column after conversion + let column_dst_ty = + self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector { + size: rows, + scalar: crate::Scalar { + kind, + width: convert, + }, + }))); + + let mut components = ArrayVec::::new(); + + for column in 0..columns as usize { + let column_id = self.gen_id(); + block.body.push(Instruction::composite_extract( + column_src_ty, + column_id, + expr_id, + &[column as u32], + )); + + let column_conv_id = self.gen_id(); + block.body.push(Instruction::unary( + spirv::Op::FConvert, + column_dst_ty, + column_conv_id, + column_id, + )); + + components.push(column_conv_id); + } + + let construct_id = self.gen_id(); + + block.body.push(Instruction::composite_construct( + result_type_id, + construct_id, + &components, + )); + + return Ok(construct_id); + } + + let (src_scalar, src_size) = match *ty { + crate::TypeInner::Scalar(scalar) => (scalar, None), + crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)), + ref other => { + log::error!("As source {:?}", other); + return Err(Error::Validation("Unexpected Expression::As source")); + } + }; + + enum Cast { + Identity, + Unary(spirv::Op), + Binary(spirv::Op, Word), + Ternary(spirv::Op, Word, Word), + } + let cast = match (src_scalar.kind, kind, convert) { + // Filter out identity casts. Some Adreno drivers are + // confused by no-op OpBitCast instructions. + (src_kind, kind, convert) + if src_kind == kind + && convert.filter(|&width| width != src_scalar.width).is_none() => + { + Cast::Identity + } + (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject), + (_, _, None) => Cast::Unary(spirv::Op::Bitcast), + // casting to a bool - generate `OpXxxNotEqual` + (_, Sk::Bool, Some(_)) => { + let op = match src_scalar.kind { + Sk::Sint | Sk::Uint => spirv::Op::INotEqual, + Sk::Float => spirv::Op::FUnordNotEqual, + Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(), + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?; + let zero_id = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: src_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + self.writer.get_constant_composite(ty, &self.temp_list) + } + None => zero_scalar_id, + }; + + Cast::Binary(op, zero_id) + } + // casting from a bool - generate `OpSelect` + (Sk::Bool, _, Some(dst_width)) => { + let dst_scalar = crate::Scalar { + kind, + width: dst_width, + }; + let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?; + let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?; + let (accept_id, reject_id) = match src_size { + Some(size) => { + let ty = LocalType::Numeric(NumericType::Vector { + size, + scalar: dst_scalar, + }) + .into(); + + self.temp_list.clear(); + self.temp_list.resize(size as _, zero_scalar_id); + + let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list); + + self.temp_list.fill(one_scalar_id); + + let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list); + + (vec1_id, vec0_id) + } + None => (one_scalar_id, zero_scalar_id), + }; + + Cast::Ternary(spirv::Op::Select, accept_id, reject_id) + } + (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU), + (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS), + (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::FConvert) + } + (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF), + (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF), + (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::SConvert) + } + (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => { + Cast::Unary(spirv::Op::UConvert) + } + // We assume it's either an identity cast, or int-uint. + _ => Cast::Unary(spirv::Op::Bitcast), + }; + Ok(match cast { + Cast::Identity => expr_id, + Cast::Unary(op) => { + let id = self.gen_id(); + block + .body + .push(Instruction::unary(op, result_type_id, id, expr_id)); + id + } + Cast::Binary(op, operand) => { + let id = self.gen_id(); + block.body.push(Instruction::binary( + op, + result_type_id, + id, + expr_id, + operand, + )); + id + } + Cast::Ternary(op, op1, op2) => { + let id = self.gen_id(); + block.body.push(Instruction::ternary( + op, + result_type_id, + id, + expr_id, + op1, + op2, + )); + id + } + }) + } + /// Build an `OpAccessChain` instruction. /// /// Emit any needed bounds-checking expressions to `block`. diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs index 9dd19b59ca..a34790614e 100644 --- a/naga/src/back/spv/instructions.rs +++ b/naga/src/back/spv/instructions.rs @@ -405,6 +405,10 @@ impl super::Instruction { instruction } + pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self { + Self::constant(result_type_id, id, &[low]) + } + pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self { Self::constant(result_type_id, id, &[value]) } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index d29f684420..7b2f15c367 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1154,6 +1154,15 @@ impl Writer { if bits == 64 { self.capabilities_used.insert(spirv::Capability::Float64); } + if bits == 16 { + self.capabilities_used.insert(spirv::Capability::Float16); + self.capabilities_used + .insert(spirv::Capability::StorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } Instruction::type_float(id, bits) } Sk::Bool => Instruction::type_bool(id), @@ -1222,6 +1231,19 @@ impl Writer { )?; self.use_extension("SPV_EXT_shader_atomic_float_add"); } + // 16 bit floating-point support requires Float16 capability + crate::TypeInner::Matrix { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Vector { + scalar: crate::Scalar::F16, + .. + } + | crate::TypeInner::Scalar(crate::Scalar::F16) => { + self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?; + self.use_extension("SPV_KHR_16bit_storage"); + } _ => {} } Ok(()) @@ -1498,6 +1520,10 @@ impl Writer { Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32) } crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()), + crate::Literal::F16(value) => { + let low = value.to_bits(); + Instruction::constant_16bit(type_id, id, low as u32) + } crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value), crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32), crate::Literal::U64(value) => { diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index cab4d1e5a8..62ad3aa644 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -5,6 +5,7 @@ use alloc::{ vec::Vec, }; use core::fmt::Write; +use hashbrown::HashSet; use super::Error; use super::ToWgslIfImplemented as _; @@ -129,6 +130,9 @@ impl Writer { // Write all needed directives. self.write_enable_dual_source_blending_if_needed(module)?; + // Write all `enable` declarations + self.write_enable_declarations(module)?; + // Write all structs for (handle, ty) in module.types.iter() { if let TypeInner::Struct { ref members, .. } = ty.inner { @@ -222,6 +226,41 @@ impl Writer { Ok(()) } + /// Helper method which writes all the `enable` declarations + /// needed for a module. + fn write_enable_declarations(&mut self, module: &Module) -> BackendResult { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + enum WrittenDeclarations { + F16, + } + + let mut written_declarations = HashSet::new(); + + // Write all the `enable` declarations + for (_, ty) in module.types.iter() { + match ty.inner { + TypeInner::Scalar(scalar) + | TypeInner::Vector { scalar, .. } + | TypeInner::Matrix { scalar, .. } => { + if scalar == crate::Scalar::F16 + && !written_declarations.contains(&WrittenDeclarations::F16) + { + writeln!(self.out, "enable f16;")?; + written_declarations.insert(WrittenDeclarations::F16); + } + } + _ => {} + } + } + + if !written_declarations.is_empty() { + // Empty line for readability + writeln!(self.out)?; + } + + Ok(()) + } + /// Helper method used to write /// [functions](https://gpuweb.github.io/gpuweb/wgsl/#functions) /// @@ -1092,6 +1131,7 @@ impl Writer { match expressions[expr] { Expression::Literal(literal) => match literal { + crate::Literal::F16(value) => write!(self.out, "{value}h")?, crate::Literal::F32(value) => write!(self.out, "{value}f")?, crate::Literal::U32(value) => write!(self.out, "{value}u")?, crate::Literal::I32(value) => { diff --git a/naga/src/common/wgsl/to_wgsl.rs b/naga/src/common/wgsl/to_wgsl.rs index 08783ed067..73dea711e7 100644 --- a/naga/src/common/wgsl/to_wgsl.rs +++ b/naga/src/common/wgsl/to_wgsl.rs @@ -270,6 +270,7 @@ impl TryToWgsl for crate::Scalar { Some(match self { Scalar::F64 => "f64", Scalar::F32 => "f32", + Scalar::F16 => "f16", Scalar::I32 => "i32", Scalar::U32 => "u32", Scalar::I64 => "i64", diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs index c0927e98a4..966c97e51e 100644 --- a/naga/src/front/glsl/error.rs +++ b/naga/src/front/glsl/error.rs @@ -109,9 +109,15 @@ pub enum ErrorKind { /// Unsupported matrix of the form matCx2 /// /// Our IR expects matrices of the form matCx2 to have a stride of 8 however - /// matrices in the std140 layout have a stride of at least 16 - #[error("unsupported matrix of the form matCx2 in std140 block layout")] - UnsupportedMatrixTypeInStd140, + /// matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 }, + /// Unsupported matrix of the form f16matCxR + /// + /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count, + /// however matrices in the std140 layout have a stride of at least 16. + #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")] + UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 }, /// A variable with the same name already exists in the current scope. #[error("Variable already declared: {0}")] VariableAlreadyDeclared(String), diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs index b17dbbecd0..1a2d909be1 100644 --- a/naga/src/front/glsl/offset.rs +++ b/naga/src/front/glsl/offset.rs @@ -122,11 +122,25 @@ pub fn calculate_offset( } // See comment on the error kind - if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi { - errors.push(Error { - kind: ErrorKind::UnsupportedMatrixTypeInStd140, - meta, - }); + if StructLayout::Std140 == layout { + // Do the f16 test first, as it's more specific + if scalar == Scalar::F16 { + errors.push(Error { + kind: ErrorKind::UnsupportedF16MatrixInStd140 { + columns: columns as u8, + rows: rows as u8, + }, + meta, + }); + } + if rows == crate::VectorSize::Bi { + errors.push(Error { + kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 { + columns: columns as u8, + }, + meta, + }); + } } (align, align * columns as u32) diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs index 72b10a62a9..dffdf96e6d 100644 --- a/naga/src/front/glsl/types.rs +++ b/naga/src/front/glsl/types.rs @@ -12,6 +12,10 @@ pub fn parse_type(type_name: &str) -> Option { name: None, inner: TypeInner::Scalar(Scalar::BOOL), }), + "float16_t" => Some(Type { + name: None, + inner: TypeInner::Scalar(Scalar::F16), + }), "float" => Some(Type { name: None, inner: TypeInner::Scalar(Scalar::F32), @@ -42,6 +46,7 @@ pub fn parse_type(type_name: &str) -> Option { "i" => Scalar::I32, "u" => Scalar::U32, "d" => Scalar::F64, + "f16" => Scalar::F16, _ => return None, }) } diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index 0fe1396ccc..ca40b8efd0 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -39,6 +39,7 @@ use alloc::{borrow::ToOwned, format, string::String, vec, vec::Vec}; use core::{convert::TryInto, mem, num::NonZeroU32}; use std::path::PathBuf; +use half::f16; use petgraph::graphmap::GraphMap; use super::atomic_upgrade::Upgrades; @@ -82,6 +83,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[ "SPV_KHR_vulkan_memory_model", "SPV_KHR_multiview", "SPV_EXT_shader_atomic_float_add", + "SPV_KHR_16bit_storage", ]; pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"]; @@ -5604,6 +5606,9 @@ impl> Frontend { }) => { let low = self.next()?; match width { + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal + // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word. + 2 => crate::Literal::F16(f16::from_bits(low as u16)), 4 => crate::Literal::F32(f32::from_bits(low)), 8 => { inst.expect(5)?; diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index eed18b9a13..aae67a94b5 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -153,8 +153,6 @@ pub enum NumberError { Invalid, #[error("numeric literal not representable by target type")] NotRepresentable, - #[error("unimplemented f16 type")] - UnimplementedF16, } #[derive(Copy, Clone, Debug, PartialEq)] @@ -1026,19 +1024,18 @@ impl<'a> Error<'a> { )], }, Error::EnableExtensionNotEnabled { kind, span } => ParseError { - message: format!("`{}` enable-extension is not enabled", kind.to_ident()), + message: format!("the `{}` language extension is not enabled", kind.to_ident()), labels: vec![( span, format!( concat!( - "the `{}` enable-extension is needed for this functionality, ", - "but it is not currently enabled" + "the `{0}` language extension is needed for this functionality, ", + "but it is not currently enabled." ), kind.to_ident() ) .into(), )], - #[allow(irrefutable_let_patterns)] notes: if let EnableExtension::Unimplemented(kind) = kind { vec![format!( concat!( @@ -1050,7 +1047,12 @@ impl<'a> Error<'a> { kind.tracking_issue_num() )] } else { - vec![] + vec![ + format!( + "You can enable this extension by adding `enable {};` at the top of the shader.", + kind.to_ident() + ), + ] }, }, Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError { diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index 0a13f438f0..d63443739e 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -2013,6 +2013,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { let expr: Typed = match *expr { ast::Expression::Literal(literal) => { let literal = match literal { + ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f), ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f), ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i), ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u), diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs index 93ee0dfed2..e130fb9d0f 100644 --- a/naga/src/front/wgsl/parse/conv.rs +++ b/naga/src/front/wgsl/parse/conv.rs @@ -1,3 +1,6 @@ +use crate::front::wgsl::parse::directive::enable_extension::{ + EnableExtensions, ImplementedEnableExtension, +}; use crate::front::wgsl::{Error, Result, Scalar}; use crate::Span; @@ -113,10 +116,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<'_, crate::StorageFo }) } -pub fn get_scalar_type(word: &str) -> Option { +pub fn get_scalar_type( + enable_extensions: &EnableExtensions, + span: Span, + word: &str, +) -> Result<'static, Option> { use crate::ScalarKind as Sk; - match word { - // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }), + let scalar = match word { + "f16" => Some(Scalar { + kind: Sk::Float, + width: 2, + }), "f32" => Some(Scalar { kind: Sk::Float, width: 4, @@ -146,7 +156,18 @@ pub fn get_scalar_type(word: &str) -> Option { width: crate::BOOL_WIDTH, }), _ => None, + }; + + if matches!(scalar, Some(Scalar::F16)) + && !enable_extensions.contains(ImplementedEnableExtension::F16) + { + return Err(Box::new(Error::EnableExtensionNotEnabled { + span, + kind: ImplementedEnableExtension::F16.into(), + })); } + + Ok(scalar) } pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> { diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs index e4ad7e1399..4fa9799882 100644 --- a/naga/src/front/wgsl/parse/directive/enable_extension.rs +++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs @@ -11,19 +11,23 @@ use alloc::boxed::Box; #[derive(Clone, Debug, Eq, PartialEq)] pub struct EnableExtensions { dual_source_blending: bool, + /// Whether `enable f16;` was written earlier in the shader module. + f16: bool, } impl EnableExtensions { pub(crate) const fn empty() -> Self { Self { + f16: false, dual_source_blending: false, } } /// Add an enable-extension to the set requested by a module. pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) { - let field: &mut bool = match ext { + let field = match ext { ImplementedEnableExtension::DualSourceBlending => &mut self.dual_source_blending, + ImplementedEnableExtension::F16 => &mut self.f16, }; *field = true; } @@ -32,6 +36,7 @@ impl EnableExtensions { pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool { match ext { ImplementedEnableExtension::DualSourceBlending => self.dual_source_blending, + ImplementedEnableExtension::F16 => self.f16, } } } @@ -47,7 +52,6 @@ impl Default for EnableExtensions { /// WGSL spec.: #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] pub enum EnableExtension { - #[allow(unused)] Implemented(ImplementedEnableExtension), Unimplemented(UnimplementedEnableExtension), } @@ -66,7 +70,7 @@ impl EnableExtension { /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible. pub(crate) fn from_ident(word: &str, span: Span) -> Result { Ok(match word { - Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16), + Self::F16 => Self::Implemented(ImplementedEnableExtension::F16), Self::CLIP_DISTANCES => { Self::Unimplemented(UnimplementedEnableExtension::ClipDistances) } @@ -82,10 +86,9 @@ impl EnableExtension { match self { Self::Implemented(kind) => match kind { ImplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING, + ImplementedEnableExtension::F16 => Self::F16, }, - Self::Unimplemented(kind) => match kind { - UnimplementedEnableExtension::F16 => Self::F16, UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES, }, } @@ -101,17 +104,17 @@ pub enum ImplementedEnableExtension { /// /// [`enable dual_source_blending;`]: https://www.w3.org/TR/WGSL/#extension-dual_source_blending DualSourceBlending, -} - -/// A variant of [`EnableExtension::Unimplemented`]. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum UnimplementedEnableExtension { /// Enables `f16`/`half` primitive support in all shader languages. /// /// In the WGSL standard, this corresponds to [`enable f16;`]. /// /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16 F16, +} + +/// A variant of [`EnableExtension::Unimplemented`]. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub enum UnimplementedEnableExtension { /// Enables the `clip_distances` variable in WGSL. /// /// In the WGSL standard, this corresponds to [`enable clip_distances;`]. @@ -123,7 +126,6 @@ pub enum UnimplementedEnableExtension { impl UnimplementedEnableExtension { pub(crate) const fn tracking_issue_num(self) -> u16 { match self { - Self::F16 => 4384, Self::ClipDistances => 6236, } } diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs index 374a1dbc98..378af3d935 100644 --- a/naga/src/front/wgsl/parse/lexer.rs +++ b/naga/src/front/wgsl/parse/lexer.rs @@ -413,14 +413,17 @@ impl<'a> Lexer<'a> { /// Parses a generic scalar type, for example ``. pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> { self.expect_generic_paren('<')?; - let pair = match self.next() { + let (scalar, _span) = match self.next() { (Token::Word(word), span) => { - conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span)) + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? } - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + (_, span) => return Err(Box::new(Error::UnknownScalarType(span))), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok(scalar) } /// Parses a generic scalar type, for example ``. @@ -430,14 +433,18 @@ impl<'a> Lexer<'a> { &mut self, ) -> Result<'a, (Scalar, Span)> { self.expect_generic_paren('<')?; - let pair = match self.next() { - (Token::Word(word), span) => conv::get_scalar_type(word) - .map(|scalar| (scalar, span)) - .ok_or(Error::UnknownScalarType(span)), - (_, span) => Err(Error::UnknownScalarType(span)), - }?; + + let (scalar, span) = match self.next() { + (Token::Word(word), span) => { + conv::get_scalar_type(&self.enable_extensions, span, word)? + .map(|scalar| (scalar, span)) + .ok_or(Error::UnknownScalarType(span))? + } + (_, span) => return Err(Box::new(Error::UnknownScalarType(span))), + }; + self.expect_generic_paren('>')?; - Ok(pair) + Ok((scalar, span)) } pub(in crate::front::wgsl) fn next_storage_access( @@ -518,6 +525,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) { #[test] fn test_numbers() { + use half::f16; // WGSL spec examples // // decimal integer @@ -542,14 +550,16 @@ fn test_numbers() { Token::Number(Ok(Number::AbstractFloat(0.01))), Token::Number(Ok(Number::AbstractFloat(12.34))), Token::Number(Ok(Number::F32(0.))), - Token::Number(Err(NumberError::UnimplementedF16)), + Token::Number(Ok(Number::F16(f16::from_f32(0.)))), Token::Number(Ok(Number::AbstractFloat(0.001))), Token::Number(Ok(Number::AbstractFloat(43.75))), Token::Number(Ok(Number::F32(16.))), Token::Number(Ok(Number::AbstractFloat(0.1875))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75 Token::Number(Ok(Number::AbstractFloat(0.12109375))), - Token::Number(Err(NumberError::UnimplementedF16)), + // https://github.com/gfx-rs/wgpu/issues/7046 + Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5 ], ); diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index ce02f93b31..cb59a82a24 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -6,9 +6,7 @@ use crate::diagnostic_filter::{ ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule, }; use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken}; -use crate::front::wgsl::parse::directive::enable_extension::{ - EnableExtension, EnableExtensions, UnimplementedEnableExtension, -}; +use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions}; use crate::front::wgsl::parse::directive::language_extension::LanguageExtension; use crate::front::wgsl::parse::directive::DirectiveKind; use crate::front::wgsl::parse::lexer::{Lexer, Token}; @@ -364,7 +362,7 @@ impl Parser { span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result<'a, Option>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::ConstructorType::Scalar(scalar))); } @@ -393,6 +391,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec2h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec3" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Tri, }, @@ -417,6 +422,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec3h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "vec4" => ast::ConstructorType::PartialVector { size: crate::VectorSize::Quad, }, @@ -441,6 +453,13 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "vec4h" => { + return Ok(Some(ast::ConstructorType::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Bi, @@ -453,6 +472,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Tri, @@ -465,6 +492,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat2x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Bi, rows: crate::VectorSize::Quad, @@ -477,6 +512,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat2x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Bi, @@ -489,6 +532,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Tri, @@ -501,6 +552,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat3x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Tri, rows: crate::VectorSize::Quad, @@ -513,6 +572,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat3x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x2" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Bi, @@ -525,6 +592,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x2h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x3" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Tri, @@ -537,6 +612,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x3h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "mat4x4" => ast::ConstructorType::PartialMatrix { columns: crate::VectorSize::Quad, rows: crate::VectorSize::Quad, @@ -549,6 +632,14 @@ impl Parser { ty_span: Span::UNDEFINED, })) } + "mat4x4h" => { + return Ok(Some(ast::ConstructorType::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + })) + } "array" => ast::ConstructorType::PartialArray, "atomic" | "binding_array" @@ -744,15 +835,17 @@ impl Parser { } (Token::Number(res), span) => { let _ = lexer.next(); - let num = res.map_err(|err| match err { - super::error::NumberError::UnimplementedF16 => { - Error::EnableExtensionNotEnabled { - kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16), + let num = res.map_err(|err| Error::BadNumber(span, err))?; + + if let Some(enable_extension) = num.required_enable_extension() { + if !lexer.enable_extensions.contains(enable_extension) { + return Err(Box::new(Error::EnableExtensionNotEnabled { + kind: enable_extension.into(), span, - } + })); } - err => Error::BadNumber(span, err), - })?; + } + ast::Expression::Literal(ast::Literal::Number(num)) } (Token::Word("RAY_FLAG_NONE"), _) => { @@ -1340,9 +1433,10 @@ impl Parser { &mut self, lexer: &mut Lexer<'a>, word: &'a str, + span: Span, ctx: &mut ExpressionContext<'a, '_, '_>, ) -> Result<'a, Option>> { - if let Some(scalar) = conv::get_scalar_type(word) { + if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? { return Ok(Some(ast::Type::Scalar(scalar))); } @@ -1370,6 +1464,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec2h" => ast::Type::Vector { + size: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec3" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1393,6 +1492,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec3h" => ast::Type::Vector { + size: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "vec4" => { let (ty, ty_span) = self.singular_generic(lexer, ctx)?; ast::Type::Vector { @@ -1416,6 +1520,11 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "vec4h" => ast::Type::Vector { + size: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)? } @@ -1425,6 +1534,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)? } @@ -1434,6 +1549,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat2x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)? } @@ -1443,6 +1564,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat2x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Bi, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)? } @@ -1452,6 +1579,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)? } @@ -1461,6 +1594,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat3x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)? } @@ -1470,6 +1609,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat3x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Tri, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x2" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)? } @@ -1479,6 +1624,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x2h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Bi, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x3" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)? } @@ -1488,6 +1639,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x3h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Tri, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "mat4x4" => { self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)? } @@ -1497,6 +1654,12 @@ impl Parser { ty: ctx.new_scalar(Scalar::F32), ty_span: Span::UNDEFINED, }, + "mat4x4h" => ast::Type::Matrix { + columns: crate::VectorSize::Quad, + rows: crate::VectorSize::Quad, + ty: ctx.new_scalar(Scalar::F16), + ty_span: Span::UNDEFINED, + }, "atomic" => { let scalar = lexer.next_scalar_generic()?; ast::Type::Atomic(scalar) @@ -1763,7 +1926,7 @@ impl Parser { let (name, span) = lexer.next_ident_with_span()?; - let ty = match this.type_decl_impl(lexer, name, ctx)? { + let ty = match this.type_decl_impl(lexer, name, span, ctx)? { Some(ty) => ty, None => { ctx.unresolved.insert(ast::Dependency { diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs index e447c12c99..776c357343 100644 --- a/naga/src/front/wgsl/parse/number.rs +++ b/naga/src/front/wgsl/parse/number.rs @@ -1,7 +1,9 @@ use alloc::format; use crate::front::wgsl::error::NumberError; +use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension; use crate::front::wgsl::parse::lexer::Token; +use half::f16; /// When using this type assume no Abstract Int/Float for now #[derive(Copy, Clone, Debug, PartialEq)] @@ -18,12 +20,23 @@ pub enum Number { I64(i64), /// Concrete u64 U64(u64), + /// Concrete f16 + F16(f16), /// Concrete f32 F32(f32), /// Concrete f64 F64(f64), } +impl Number { + pub(super) const fn required_enable_extension(&self) -> Option { + match *self { + Number::F16(_) => Some(ImplementedEnableExtension::F16), + _ => None, + } + } +} + pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) { let (result, rest) = parse(input); (Token::Number(result), rest) @@ -369,7 +382,8 @@ fn parse_hex_float(input: &str, kind: Option) -> Result Err(NumberError::NotRepresentable), }, - Some(FloatKind::F16) => Err(NumberError::UnimplementedF16), + // TODO: f16 is not supported by hexf_parse + Some(FloatKind::F16) => Err(NumberError::NotRepresentable), Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) { Ok(num) => Ok(Number::F32(num)), // can only be ParseHexfErrorKind::Inexact but we can't check since it's private @@ -405,7 +419,12 @@ fn parse_dec_float(input: &str, kind: Option) -> Result Err(NumberError::UnimplementedF16), + Some(FloatKind::F16) => { + let num = input.parse::().unwrap(); // will never fail + num.is_finite() + .then_some(Number::F16(num)) + .ok_or(NumberError::NotRepresentable) + } } } diff --git a/naga/src/ir/mod.rs b/naga/src/ir/mod.rs index fcd152d94e..ed7fd7fece 100644 --- a/naga/src/ir/mod.rs +++ b/naga/src/ir/mod.rs @@ -225,6 +225,7 @@ use alloc::{string::String, vec::Vec}; #[cfg(feature = "arbitrary")] use arbitrary::Arbitrary; +use half::f16; #[cfg(feature = "deserialize")] use serde::Deserialize; #[cfg(feature = "serialize")] @@ -818,6 +819,7 @@ pub enum Literal { F64(f64), /// May not be NaN or infinity. F32(f32), + F16(f16), U32(u32), I32(i32), U64(u64), diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index 38eaabe3ce..b6254a3e10 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -7,6 +7,8 @@ use alloc::{ use core::iter; use arrayvec::ArrayVec; +use half::f16; +use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero}; use crate::{ arena::{Arena, Handle, HandleVec, UniqueArena}, @@ -208,6 +210,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => AbstractFloat: f64, F32 => F32: f32, + F16 => F16: f16, AbstractInt => AbstractInt: i64, U32 => U32: u32, I32 => I32: i32, @@ -228,6 +231,7 @@ gen_component_wise_extractor! { literals: [ AbstractFloat => Abstract: f64, F32 => F32: f32, + F16 => F16: f16, ], scalar_kinds: [ Float, @@ -253,6 +257,7 @@ gen_component_wise_extractor! { AbstractFloat => AbstractFloat: f64, AbstractInt => AbstractInt: i64, F32 => F32: f32, + F16 => F16: f16, I32 => I32: i32, ], scalar_kinds: [ @@ -1114,6 +1119,7 @@ impl<'a> ConstantEvaluator<'a> { component_wise_scalar(self, span, [arg], |args| match args { Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])), Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])), + Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])), Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])), Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])), Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz @@ -1145,9 +1151,13 @@ impl<'a> ConstantEvaluator<'a> { } ) } - crate::MathFunction::Saturate => { - component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) }) - } + crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e { + Float::F16([e]) => Ok(Float::F16( + [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))], + )), + Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])), + Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])), + }), // trigonometry crate::MathFunction::Cos => { @@ -1201,7 +1211,35 @@ impl<'a> ConstantEvaluator<'a> { component_wise_float!(self, span, [arg], |e| { Ok([e.floor()]) }) } crate::MathFunction::Round => { - component_wise_float!(self, span, [arg], |e| { Ok([e.round_ties_even()]) }) + component_wise_float(self, span, [arg], |e| match e { + Float::Abstract([e]) => Ok(Float::Abstract([e.round_ties_even()])), + Float::F32([e]) => Ok(Float::F32([e.round_ties_even()])), + Float::F16([e]) => { + // TODO: `round_ties_even` is not available on `half::f16` yet. + // + // This polyfill is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source], + // which has licensing compatible with ours. See also + // . + // + // [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98 + fn round_ties_even(x: f64) -> f64 { + let i = x as i64; + let f = (x - i as f64).abs(); + if f == 0.5 { + if i & 1 == 1 { + // -1.5, 1.5, 3.5, ... + (x.abs() + 0.5).copysign(x) + } else { + (x.abs() - 0.5).copysign(x) + } + } else { + x.round() + } + } + + Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))])) + } + }) } crate::MathFunction::Fract => { component_wise_float!(self, span, [arg], |e| { @@ -1246,15 +1284,27 @@ impl<'a> ConstantEvaluator<'a> { ) } crate::MathFunction::Step => { - component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| { - Ok([if edge <= x { 1.0 } else { 0.0 }]) + component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x { + Float::Abstract([edge, x]) => { + Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }])) + } + Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])), + Float::F16([edge, x]) => Ok(Float::F16([if edge <= x { + f16::one() + } else { + f16::zero() + }])), }) } crate::MathFunction::Sqrt => { component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) }) } crate::MathFunction::InverseSqrt => { - component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) }) + component_wise_float(self, span, [arg], |e| match e { + Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])), + Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])), + Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])), + }) } // bits @@ -1549,6 +1599,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v, Literal::U32(v) => v as i32, Literal::F32(v) => v as i32, + Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as i32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1560,6 +1611,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v as u32, Literal::U32(v) => v, Literal::F32(v) => v as u32, + Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf Literal::Bool(v) => v as u32, Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); @@ -1575,6 +1627,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as i64, Literal::I64(v) => v, Literal::U64(v) => v as i64, + Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => i64::try_from_abstract(v)?, Literal::AbstractFloat(v) => i64::try_from_abstract(v)?, }), @@ -1586,9 +1639,22 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(v) => v as u64, Literal::I64(v) => v as u64, Literal::U64(v) => v, + Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf Literal::AbstractInt(v) => u64::try_from_abstract(v)?, Literal::AbstractFloat(v) => u64::try_from_abstract(v)?, }), + Sc::F16 => Literal::F16(match literal { + Literal::F16(v) => v, + Literal::F32(v) => f16::from_f32(v), + Literal::F64(v) => f16::from_f64(v), + Literal::Bool(v) => f16::from_u32(v as u32).unwrap(), + Literal::I64(v) => f16::from_i64(v).unwrap(), + Literal::U64(v) => f16::from_u64(v).unwrap(), + Literal::I32(v) => f16::from_i32(v).unwrap(), + Literal::U32(v) => f16::from_u32(v).unwrap(), + Literal::AbstractFloat(v) => f16::try_from_abstract(v)?, + Literal::AbstractInt(v) => f16::try_from_abstract(v)?, + }), Sc::F32 => Literal::F32(match literal { Literal::I32(v) => v as f32, Literal::U32(v) => v as f32, @@ -1597,12 +1663,14 @@ impl<'a> ConstantEvaluator<'a> { Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => { return make_error(); } + Literal::F16(v) => f16::to_f32(v), Literal::AbstractInt(v) => f32::try_from_abstract(v)?, Literal::AbstractFloat(v) => f32::try_from_abstract(v)?, }), Sc::F64 => Literal::F64(match literal { Literal::I32(v) => v as f64, Literal::U32(v) => v as f64, + Literal::F16(v) => f16::to_f64(v), Literal::F32(v) => v as f64, Literal::F64(v) => v, Literal::Bool(v) => v as u32 as f64, @@ -1614,6 +1682,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => v != 0, Literal::U32(v) => v != 0, Literal::F32(v) => v != 0.0, + Literal::F16(v) => v != f16::zero(), Literal::Bool(v) => v, Literal::F64(_) | Literal::I64(_) @@ -1770,6 +1839,7 @@ impl<'a> ConstantEvaluator<'a> { Literal::I32(v) => Literal::I32(v.wrapping_neg()), Literal::I64(v) => Literal::I64(v.wrapping_neg()), Literal::F32(v) => Literal::F32(-v), + Literal::F16(v) => Literal::F16(-v), Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()), Literal::AbstractFloat(v) => Literal::AbstractFloat(-v), _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg), @@ -1918,6 +1988,14 @@ impl<'a> ConstantEvaluator<'a> { _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), }) } + (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op { + BinaryOperator::Add => a + b, + BinaryOperator::Subtract => a - b, + BinaryOperator::Multiply => a * b, + BinaryOperator::Divide => a / b, + BinaryOperator::Modulo => a % b, + _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs), + }), (Literal::AbstractInt(a), Literal::AbstractInt(b)) => { Literal::AbstractInt(match op { BinaryOperator::Add => a.checked_add(b).ok_or_else(|| { @@ -2522,6 +2600,32 @@ impl TryFromAbstract for u64 { } } +impl TryFromAbstract for f16 { + fn try_from_abstract(value: f64) -> Result { + let f = f16::from_f64(value); + if f.is_infinite() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f) + } +} + +impl TryFromAbstract for f16 { + fn try_from_abstract(value: i64) -> Result { + let f = f16::from_i64(value); + if f.is_none() { + return Err(ConstantEvaluatorError::AutomaticConversionLossy { + value: format!("{value:?}"), + to_type: "f16", + }); + } + Ok(f.unwrap()) + } +} + #[cfg(test)] mod tests { use alloc::{vec, vec::Vec}; diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs index 84b4d1af42..3412f9cea4 100644 --- a/naga/src/proc/mod.rs +++ b/naga/src/proc/mod.rs @@ -80,6 +80,7 @@ impl From for super::Scalar { pub enum HashableLiteral { F64(u64), F32(u32), + F16(u16), U32(u32), I32(i32), U64(u64), @@ -94,6 +95,7 @@ impl From for HashableLiteral { match l { crate::Literal::F64(v) => Self::F64(v.to_bits()), crate::Literal::F32(v) => Self::F32(v.to_bits()), + crate::Literal::F16(v) => Self::F16(v.to_bits()), crate::Literal::U32(v) => Self::U32(v), crate::Literal::I32(v) => Self::I32(v), crate::Literal::U64(v) => Self::U64(v), @@ -132,6 +134,7 @@ impl crate::Literal { match *self { Self::F64(_) | Self::I64(_) | Self::U64(_) => 8, Self::F32(_) | Self::U32(_) | Self::I32(_) => 4, + Self::F16(_) => 2, Self::Bool(_) => crate::BOOL_WIDTH, Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH, } @@ -140,6 +143,7 @@ impl crate::Literal { match *self { Self::F64(_) => crate::Scalar::F64, Self::F32(_) => crate::Scalar::F32, + Self::F16(_) => crate::Scalar::F16, Self::U32(_) => crate::Scalar::U32, Self::I32(_) => crate::Scalar::I32, Self::U64(_) => crate::Scalar::U64, diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs index 4eda07c9b7..d4089bc1fa 100644 --- a/naga/src/proc/type_methods.rs +++ b/naga/src/proc/type_methods.rs @@ -20,6 +20,10 @@ impl crate::ScalarKind { } impl crate::Scalar { + pub const F16: Self = Self { + kind: crate::ScalarKind::Float, + width: 2, + }; pub const I32: Self = Self { kind: crate::ScalarKind::Sint, width: 4, diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index 9d9e4d2f16..096bd4cae1 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -1794,7 +1794,7 @@ impl super::Validator { } pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> { - self.check_width(literal.scalar())?; + let _ = self.check_width(literal.scalar())?; check_literal_value(literal)?; Ok(()) diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs index 4191d16e94..4db7a6354f 100644 --- a/naga/src/valid/interface.rs +++ b/naga/src/valid/interface.rs @@ -4,7 +4,7 @@ use bit_set::BitSet; use super::{ analyzer::{FunctionInfo, GlobalUse}, - Capabilities, Disalignment, FunctionError, ModuleInfo, + Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError, }; use crate::arena::{Handle, UniqueArena}; use crate::span::{AddSpan as _, MapErrWithSpan as _, SpanProvider as _, WithSpan}; @@ -41,6 +41,8 @@ pub enum GlobalVariableError { InitializerNotAllowed(crate::AddressSpace), #[error("Storage address space doesn't support write-only access")] StorageAddressSpaceWriteOnlyNotSupported, + #[error("Type is not valid for use as a push constant")] + InvalidPushConstantType(#[source] PushConstantError), } #[derive(Clone, Debug, thiserror::Error)] @@ -596,6 +598,9 @@ impl super::Validator { Capabilities::PUSH_CONSTANT, )); } + if let Err(ref err) = type_info.push_constant_compatibility { + return Err(GlobalVariableError::InvalidPushConstantType(err.clone())); + } ( TypeFlags::DATA | TypeFlags::COPY diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index ec596a974b..8f5501bf02 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -31,7 +31,7 @@ pub use expression::{check_literal_value, LiteralError}; pub use expression::{ConstExpressionError, ExpressionError}; pub use function::{CallError, FunctionError, LocalVariableError}; pub use interface::{EntryPointError, GlobalVariableError, VaryingError}; -pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError}; +pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError}; use self::handles::InvalidHandleError; @@ -163,6 +163,8 @@ bitflags::bitflags! { const TEXTURE_INT64_ATOMIC = 1 << 24; /// Support for ray queries returning vertex position const RAY_HIT_VERTEX_POSITION = 1 << 25; + /// Support for 16-bit floating-point types. + const SHADER_FLOAT16 = 1 << 26; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index 5863eb813f..b3ae13b7d4 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -169,8 +169,16 @@ pub enum WidthError { Abstract, } +#[derive(Clone, Debug, thiserror::Error)] +#[cfg_attr(test, derive(PartialEq))] +pub enum PushConstantError { + #[error("The scalar type {0:?} is not supported in push constants")] + InvalidScalar(crate::Scalar), +} + // Only makes sense if `flags.contains(HOST_SHAREABLE)` type LayoutCompatibility = Result, Disalignment)>; +type PushConstantCompatibility = Result<(), PushConstantError>; fn check_member_layout( accum: &mut LayoutCompatibility, @@ -221,6 +229,7 @@ pub(super) struct TypeInfo { pub flags: TypeFlags, pub uniform_layout: LayoutCompatibility, pub storage_layout: LayoutCompatibility, + pub push_constant_compatibility: PushConstantCompatibility, } impl TypeInfo { @@ -229,6 +238,7 @@ impl TypeInfo { flags: TypeFlags::empty(), uniform_layout: Ok(Alignment::ONE), storage_layout: Ok(Alignment::ONE), + push_constant_compatibility: Ok(()), } } @@ -237,6 +247,7 @@ impl TypeInfo { flags, uniform_layout: Ok(alignment), storage_layout: Ok(alignment), + push_constant_compatibility: Ok(()), } } } @@ -250,11 +261,15 @@ impl super::Validator { } } - pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> { + pub(super) const fn check_width( + &self, + scalar: crate::Scalar, + ) -> Result { + let mut push_constant_compatibility = Ok(()); let good = match scalar.kind { crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH, - crate::ScalarKind::Float => { - if scalar.width == 8 { + crate::ScalarKind::Float => match scalar.width { + 8 => { if !self.capabilities.contains(Capabilities::FLOAT64) { return Err(WidthError::MissingCapability { name: "f64", @@ -262,10 +277,21 @@ impl super::Validator { }); } true - } else { - scalar.width == 4 } - } + 2 => { + if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) { + return Err(WidthError::MissingCapability { + name: "f16", + flag: "FLOAT16", + }); + } + + push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar)); + + true + } + _ => scalar.width == 4, + }, crate::ScalarKind::Sint => { if scalar.width == 8 { if !self.capabilities.contains(Capabilities::SHADER_INT64) { @@ -297,7 +323,7 @@ impl super::Validator { } }; if good { - Ok(()) + Ok(push_constant_compatibility) } else { Err(WidthError::Invalid(scalar.kind, scalar.width)) } @@ -317,13 +343,13 @@ impl super::Validator { use crate::TypeInner as Ti; Ok(match gctx.types[handle].inner { Ti::Scalar(scalar) => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -332,16 +358,18 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Vector { size, scalar } => { - self.check_width(scalar)?; + let push_constant_compatibility = self.check_width(scalar)?; let shareable = if scalar.kind.is_numeric() { TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE } else { TypeFlags::empty() }; - TypeInfo::new( + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -350,7 +378,9 @@ impl super::Validator { | TypeFlags::CREATION_RESOLVED | shareable, Alignment::from(size) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Matrix { columns: _, @@ -360,8 +390,8 @@ impl super::Validator { if scalar.kind != crate::ScalarKind::Float { return Err(TypeError::MatrixElementNotFloat); } - self.check_width(scalar)?; - TypeInfo::new( + let push_constant_compatibility = self.check_width(scalar)?; + let mut type_info = TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::COPY @@ -370,7 +400,9 @@ impl super::Validator { | TypeFlags::CONSTRUCTIBLE | TypeFlags::CREATION_RESOLVED, Alignment::from(rows) * Alignment::from_width(scalar.width), - ) + ); + type_info.push_constant_compatibility = push_constant_compatibility; + type_info } Ti::Atomic(scalar) => { match scalar { @@ -465,7 +497,7 @@ impl super::Validator { // However, some cases are trivial: All our implicit base types // are DATA and SIZED, so we can never return // `InvalidPointerBase` or `InvalidPointerToUnsized`. - self.check_width(scalar)?; + let _ = self.check_width(scalar)?; // `Validator::validate_function` actually checks the address // space of pointer arguments explicitly before checking the @@ -551,6 +583,7 @@ impl super::Validator { flags: base_info.flags & type_info_mask, uniform_layout, storage_layout, + push_constant_compatibility: base_info.push_constant_compatibility.clone(), } } Ti::Struct { ref members, span } => { @@ -631,6 +664,10 @@ impl super::Validator { base_info.storage_layout, handle, ); + if base_info.push_constant_compatibility.is_err() { + ti.push_constant_compatibility = + base_info.push_constant_compatibility.clone(); + } // Validate rule: If a structure member itself has a structure type S, // then the number of bytes between the start of that member and diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp new file mode 100644 index 0000000000..af8c89cb8a --- /dev/null +++ b/naga/tests/in/glsl/f16-glsl.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/glsl/f16-glsl.toml b/naga/tests/in/glsl/f16-glsl.toml new file mode 100644 index 0000000000..fd242d5b5b --- /dev/null +++ b/naga/tests/in/glsl/f16-glsl.toml @@ -0,0 +1 @@ +god_mode = true diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp new file mode 100644 index 0000000000..af8c89cb8a --- /dev/null +++ b/naga/tests/in/spv/f16-spv.comp @@ -0,0 +1,57 @@ +#version 460 + +#extension GL_AMD_gpu_shader_half_float: enable + +layout(set = 0, binding = 0) uniform A { + float16_t a_1; + f16vec2 a_vec2; + f16vec3 a_vec3; + f16vec4 a_vec4; + // So the rules here are particularly nasty for any f16 matries in uniform buffers + // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform + // buffer is over-aligned to what naga-ir wants. + // + // This is https://github.com/gfx-rs/wgpu/issues/4375. + + // f16mat2 a_mat2; + // f16mat2x3 a_mat2x3; + // f16mat2x4 a_mat2x4; + // f16mat3x2 a_mat3x2; + // f16mat3 a_mat3; + // f16mat3x4 a_mat3x4; + // f16mat4x2 a_mat4x2; + // f16mat4x3 a_mat4x3; + // f16mat4 a_mat4; +}; + +layout(set = 0, binding = 1) buffer B { + float16_t b_1; + f16vec2 b_vec2; + f16vec3 b_vec3; + f16vec4 b_vec4; + f16mat2 b_mat2; + f16mat2x3 b_mat2x3; + f16mat2x4 b_mat2x4; + f16mat3x2 b_mat3x2; + f16mat3 b_mat3; + f16mat3x4 b_mat3x4; + f16mat4x2 b_mat4x2; + f16mat4x3 b_mat4x3; + f16mat4 b_mat4; +}; + +void main() { + b_1 = a_1; + b_vec2 = a_vec2; + b_vec3 = a_vec3; + b_vec4 = a_vec4; + // b_mat2 = a_mat2; + // b_mat2x3 = a_mat2x3; + // b_mat2x4 = a_mat2x4; + // b_mat3x2 = a_mat3x2; + // b_mat3 = a_mat3; + // b_mat3x4 = a_mat3x4; + // b_mat4x2 = a_mat4x2; + // b_mat4x3 = a_mat4x3; + // b_mat4 = a_mat4; +} diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm new file mode 100644 index 0000000000..806de32754 --- /dev/null +++ b/naga/tests/in/spv/f16-spv.spvasm @@ -0,0 +1,130 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Shaderc over Glslang; 11 +; Bound: 46 +; Schema: 0 + OpCapability Shader + OpCapability StorageBuffer16BitAccess + OpCapability UniformAndStorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 460 + OpSourceExtension "GL_AMD_gpu_shader_half_float" + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpName %main "main" + OpName %B "B" + OpMemberName %B 0 "b_1" + OpMemberName %B 1 "b_vec2" + OpMemberName %B 2 "b_vec3" + OpMemberName %B 3 "b_vec4" + OpMemberName %B 4 "b_mat2" + OpMemberName %B 5 "b_mat2x3" + OpMemberName %B 6 "b_mat2x4" + OpMemberName %B 7 "b_mat3x2" + OpMemberName %B 8 "b_mat3" + OpMemberName %B 9 "b_mat3x4" + OpMemberName %B 10 "b_mat4x2" + OpMemberName %B 11 "b_mat4x3" + OpMemberName %B 12 "b_mat4" + OpName %_ "" + OpName %A "A" + OpMemberName %A 0 "a_1" + OpMemberName %A 1 "a_vec2" + OpMemberName %A 2 "a_vec3" + OpMemberName %A 3 "a_vec4" + OpName %__0 "" + OpDecorate %B BufferBlock + OpMemberDecorate %B 0 Offset 0 + OpMemberDecorate %B 1 Offset 4 + OpMemberDecorate %B 2 Offset 8 + OpMemberDecorate %B 3 Offset 16 + OpMemberDecorate %B 4 ColMajor + OpMemberDecorate %B 4 MatrixStride 4 + OpMemberDecorate %B 4 Offset 24 + OpMemberDecorate %B 5 ColMajor + OpMemberDecorate %B 5 MatrixStride 8 + OpMemberDecorate %B 5 Offset 32 + OpMemberDecorate %B 6 ColMajor + OpMemberDecorate %B 6 MatrixStride 8 + OpMemberDecorate %B 6 Offset 48 + OpMemberDecorate %B 7 ColMajor + OpMemberDecorate %B 7 MatrixStride 4 + OpMemberDecorate %B 7 Offset 64 + OpMemberDecorate %B 8 ColMajor + OpMemberDecorate %B 8 MatrixStride 8 + OpMemberDecorate %B 8 Offset 80 + OpMemberDecorate %B 9 ColMajor + OpMemberDecorate %B 9 MatrixStride 8 + OpMemberDecorate %B 9 Offset 104 + OpMemberDecorate %B 10 ColMajor + OpMemberDecorate %B 10 MatrixStride 4 + OpMemberDecorate %B 10 Offset 128 + OpMemberDecorate %B 11 ColMajor + OpMemberDecorate %B 11 MatrixStride 8 + OpMemberDecorate %B 11 Offset 144 + OpMemberDecorate %B 12 ColMajor + OpMemberDecorate %B 12 MatrixStride 8 + OpMemberDecorate %B 12 Offset 176 + OpDecorate %_ Binding 1 + OpDecorate %_ DescriptorSet 0 + OpDecorate %A Block + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 4 + OpMemberDecorate %A 2 Offset 8 + OpMemberDecorate %A 3 Offset 16 + OpDecorate %__0 Binding 0 + OpDecorate %__0 DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 + %mat2v2half = OpTypeMatrix %v2half 2 + %mat2v3half = OpTypeMatrix %v3half 2 + %mat2v4half = OpTypeMatrix %v4half 2 + %mat3v2half = OpTypeMatrix %v2half 3 + %mat3v3half = OpTypeMatrix %v3half 3 + %mat3v4half = OpTypeMatrix %v4half 3 + %mat4v2half = OpTypeMatrix %v2half 4 + %mat4v3half = OpTypeMatrix %v3half 4 + %mat4v4half = OpTypeMatrix %v4half 4 + %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half +%_ptr_Uniform_B = OpTypePointer Uniform %B + %_ = OpVariable %_ptr_Uniform_B Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %A = OpTypeStruct %half %v2half %v3half %v4half +%_ptr_Uniform_A = OpTypePointer Uniform %A + %__0 = OpVariable %_ptr_Uniform_A Uniform +%_ptr_Uniform_half = OpTypePointer Uniform %half + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %int_2 = OpConstant %int 2 +%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half + %int_3 = OpConstant %int 3 +%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half + %main = OpFunction %void None %3 + %5 = OpLabel + %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0 + %29 = OpLoad %half %28 + %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0 + OpStore %30 %29 + %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1 + %34 = OpLoad %v2half %33 + %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1 + OpStore %35 %34 + %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2 + %39 = OpLoad %v3half %38 + %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2 + OpStore %40 %39 + %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3 + %44 = OpLoad %v4half %43 + %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3 + OpStore %45 %44 + OpReturn + OpFunctionEnd diff --git a/naga/tests/in/spv/f16-spv.toml b/naga/tests/in/spv/f16-spv.toml new file mode 100644 index 0000000000..fd242d5b5b --- /dev/null +++ b/naga/tests/in/spv/f16-spv.toml @@ -0,0 +1 @@ +god_mode = true diff --git a/naga/tests/in/wgsl/extra.wgsl b/naga/tests/in/wgsl/extra.wgsl index ef68f4aa80..1a7ab91ff5 100644 --- a/naga/tests/in/wgsl/extra.wgsl +++ b/naga/tests/in/wgsl/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } var pc: PushConstants; diff --git a/naga/tests/in/wgsl/f16.toml b/naga/tests/in/wgsl/f16.toml new file mode 100644 index 0000000000..04b61f6b30 --- /dev/null +++ b/naga/tests/in/wgsl/f16.toml @@ -0,0 +1,15 @@ +# No GLSL support for f16 +targets = "SPIRV | METAL | HLSL | WGSL" +god_mode = true + +[spv] +version = [1, 0] + +[hlsl] +shader_model = "V6_2" +special_constants_binding = { space = 1, register = 0 } +push_constants_target = { space = 0, register = 0 } + +lang_version = [1, 0] +per_entry_point_map = {} +zero_initialize_workgroup_memory = true diff --git a/naga/tests/in/wgsl/f16.wgsl b/naga/tests/in/wgsl/f16.wgsl new file mode 100644 index 0000000000..dbef9aedfc --- /dev/null +++ b/naga/tests/in/wgsl/f16.wgsl @@ -0,0 +1,127 @@ +enable f16; + +var private_variable: f16 = 1h; +const constant_variable: f16 = f16(15.2); + +struct UniformCompatible { + // Other types + val_u32: u32, + val_i32: i32, + val_f32: f32, + + // f16 + val_f16: f16, + val_f16_2: vec2, + val_f16_3: vec3, + val_f16_4: vec4, + final_value: f16, + + val_mat2x2: mat2x2, + val_mat2x3: mat2x3, + val_mat2x4: mat2x4, + val_mat3x2: mat3x2, + val_mat3x3: mat3x3, + val_mat3x4: mat3x4, + val_mat4x2: mat4x2, + val_mat4x3: mat4x3, + val_mat4x4: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2: array, +} + +struct LayoutTest { + scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32 +} + +@group(0) @binding(0) +var input_uniform: UniformCompatible; + +@group(0) @binding(1) +var input_storage: UniformCompatible; + +@group(0) @binding(2) +var input_arrays: StorageCompatible; + +@group(0) @binding(3) +var output: UniformCompatible; + +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = f16(constant_variable); + // A number too big for f16 + val += 1h - 33333h; + // Constructing an f16 from an AbstractInt + val += val + f16(5.); + // Constructing a f16 from other types and other types from f16. + val += f16(input_uniform.val_f32 + f32(val)); + // Constructing a vec3 from a i64 + val += vec3(input_uniform.val_f16).z; + + // Reading/writing to a uniform/storage buffer + output.val_f16 = input_uniform.val_f16 + input_storage.val_f16; + output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2; + output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3; + output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4; + + output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2; + output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3; + output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4; + output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2; + output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3; + output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4; + output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2; + output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3; + output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4; + + output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2; + + // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin + // constants based on the size of the type, and which are arguments. + + // Numeric functions + val += abs(val); + val += clamp(val, val, val); + val += dot(vec2(val), vec2(val)); + val += max(val, val); + val += min(val, val); + val += sign(val); + + val += f16(1.0); + + // We use the shorthand aliases here to ensure the aliases + // work correctly. + + // Cast vectors to/from f32 + let float_vec2 = vec2f(input_uniform.val_f16_2); + output.val_f16_2 = vec2h(float_vec2); + + let float_vec3 = vec3f(input_uniform.val_f16_3); + output.val_f16_3 = vec3h(float_vec3); + + let float_vec4 = vec4f(input_uniform.val_f16_4); + output.val_f16_4 = vec4h(float_vec4); + + // Cast matrices to/from f32 + output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2)); + output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3)); + output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4)); + output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2)); + output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3)); + output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4)); + output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2)); + output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3)); + output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4)); + + // Make sure all the variables are used. + return val; +} + +@compute @workgroup_size(1) +fn main() { + output.final_value = f16_function(2h); +} + diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl new file mode 100644 index 0000000000..05e2387212 --- /dev/null +++ b/naga/tests/out/hlsl/f16.hlsl @@ -0,0 +1,351 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + half2 val_f16_2_; + int _pad5_0; + half3 val_f16_3_; + half4 val_f16_4_; + half final_value; + half2 val_mat2x2__0; half2 val_mat2x2__1; + int _pad9_0; + row_major half2x3 val_mat2x3_; + row_major half2x4 val_mat2x4_; + half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2; + int _pad12_0; + row_major half3x3 val_mat3x3_; + row_major half3x4 val_mat3x4_; + half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3; + row_major half4x3 val_mat4x3_; + row_major half4x4 val_mat4x4_; +}; + +struct StorageCompatible { + half val_f16_array_2_[2]; +}; + +struct LayoutTest { + half scalar1_; + half scalar2_; + int _pad2_0; + half3 v3_; + half tuck_in; + half scalar4_; + uint larger; +}; + +static const half constant_variable = 15.203125h; + +static half private_variable = 1.0h; +cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; } +ByteAddressBuffer input_storage : register(t1); +ByteAddressBuffer input_arrays : register(t2); +RWByteAddressBuffer output : register(u3); +RWByteAddressBuffer output_arrays : register(u4); + +half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) { + return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1); +} + +void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) { + obj.val_mat2x2__0 = mat[0]; + obj.val_mat2x2__1 = mat[1]; +} + +void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0 = vec; break; } + case 1: { obj.val_mat2x2__1 = vec; break; } + } +} + +void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; } + } +} + +half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) { + return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2); +} + +void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) { + obj.val_mat3x2__0 = mat[0]; + obj.val_mat3x2__1 = mat[1]; + obj.val_mat3x2__2 = mat[2]; +} + +void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0 = vec; break; } + case 1: { obj.val_mat3x2__1 = vec; break; } + case 2: { obj.val_mat3x2__2 = vec; break; } + } +} + +void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; } + } +} + +half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) { + return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3); +} + +void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) { + obj.val_mat4x2__0 = mat[0]; + obj.val_mat4x2__1 = mat[1]; + obj.val_mat4x2__2 = mat[2]; + obj.val_mat4x2__3 = mat[3]; +} + +void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0 = vec; break; } + case 1: { obj.val_mat4x2__1 = vec; break; } + case 2: { obj.val_mat4x2__2 = vec; break; } + case 3: { obj.val_mat4x2__3 = vec; break; } + } +} + +void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) { + switch(mat_idx) { + case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; } + case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; } + case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; } + case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; } + } +} + +typedef half ret_Constructarray2_half_[2]; +ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) { + half ret[2] = { arg0, arg1 }; + return ret; +} + +half f16_function(half x) +{ + half val = 15.203125h; + + half _e4 = val; + val = (_e4 + -33344.0h); + half _e6 = val; + half _e9 = val; + val = (_e9 + (_e6 + 5.0h)); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = (_e18 + half((_e13 + float(_e14)))); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = (_e25 + (_e22).xxx.z); + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.Load(12); + output.Store(12, (_e31 + _e34)); + half2 _e40 = input_uniform.val_f16_2_; + half2 _e43 = input_storage.Load(16); + output.Store(16, (_e40 + _e43)); + half3 _e49 = input_uniform.val_f16_3_; + half3 _e52 = input_storage.Load(24); + output.Store(24, (_e49 + _e52)); + half4 _e58 = input_uniform.val_f16_4_; + half4 _e61 = input_storage.Load(32); + output.Store(32, (_e58 + _e61)); + half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + half2x2 _e70 = half2x2(input_storage.Load(44+0), input_storage.Load(44+4)); + { + half2x2 _value2 = (_e67 + _e70); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e76 = input_uniform.val_mat2x3_; + half2x3 _e79 = half2x3(input_storage.Load(56+0), input_storage.Load(56+8)); + { + half2x3 _value2 = (_e76 + _e79); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e85 = input_uniform.val_mat2x4_; + half2x4 _e88 = half2x4(input_storage.Load(72+0), input_storage.Load(72+8)); + { + half2x4 _value2 = (_e85 + _e88); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + half3x2 _e97 = half3x2(input_storage.Load(88+0), input_storage.Load(88+4), input_storage.Load(88+8)); + { + half3x2 _value2 = (_e94 + _e97); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e103 = input_uniform.val_mat3x3_; + half3x3 _e106 = half3x3(input_storage.Load(104+0), input_storage.Load(104+8), input_storage.Load(104+16)); + { + half3x3 _value2 = (_e103 + _e106); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e112 = input_uniform.val_mat3x4_; + half3x4 _e115 = half3x4(input_storage.Load(128+0), input_storage.Load(128+8), input_storage.Load(128+16)); + { + half3x4 _value2 = (_e112 + _e115); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + half4x2 _e124 = half4x2(input_storage.Load(152+0), input_storage.Load(152+4), input_storage.Load(152+8), input_storage.Load(152+12)); + { + half4x2 _value2 = (_e121 + _e124); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e130 = input_uniform.val_mat4x3_; + half4x3 _e133 = half4x3(input_storage.Load(168+0), input_storage.Load(168+8), input_storage.Load(168+16), input_storage.Load(168+24)); + { + half4x3 _value2 = (_e130 + _e133); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e139 = input_uniform.val_mat4x4_; + half4x4 _e142 = half4x4(input_storage.Load(200+0), input_storage.Load(200+8), input_storage.Load(200+16), input_storage.Load(200+24)); + { + half4x4 _value2 = (_e139 + _e142); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e148[2] = Constructarray2_half_(input_arrays.Load(0+0), input_arrays.Load(0+2)); + { + half _value2[2] = _e148; + output_arrays.Store(0+0, _value2[0]); + output_arrays.Store(0+2, _value2[1]); + } + half _e149 = val; + half _e151 = val; + val = (_e151 + abs(_e149)); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = (_e164 + dot((_e159).xx, (_e161).xx)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = (_e169 + max(_e166, _e167)); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = (_e174 + min(_e171, _e172)); + half _e176 = val; + half _e178 = val; + val = (_e178 + sign(_e176)); + half _e181 = val; + val = (_e181 + 1.0h); + half2 _e185 = input_uniform.val_f16_2_; + float2 float_vec2_ = float2(_e185); + output.Store(16, half2(float_vec2_)); + half3 _e192 = input_uniform.val_f16_3_; + float3 float_vec3_ = float3(_e192); + output.Store(24, half3(float_vec3_)); + half4 _e199 = input_uniform.val_f16_4_; + float4 float_vec4_ = float4(_e199); + output.Store(32, half4(float_vec4_)); + half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform); + { + half2x2 _value2 = half2x2(float2x2(_e208)); + output.Store(44+0, _value2[0]); + output.Store(44+4, _value2[1]); + } + half2x3 _e215 = input_uniform.val_mat2x3_; + { + half2x3 _value2 = half2x3(float2x3(_e215)); + output.Store(56+0, _value2[0]); + output.Store(56+8, _value2[1]); + } + half2x4 _e222 = input_uniform.val_mat2x4_; + { + half2x4 _value2 = half2x4(float2x4(_e222)); + output.Store(72+0, _value2[0]); + output.Store(72+8, _value2[1]); + } + half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform); + { + half3x2 _value2 = half3x2(float3x2(_e229)); + output.Store(88+0, _value2[0]); + output.Store(88+4, _value2[1]); + output.Store(88+8, _value2[2]); + } + half3x3 _e236 = input_uniform.val_mat3x3_; + { + half3x3 _value2 = half3x3(float3x3(_e236)); + output.Store(104+0, _value2[0]); + output.Store(104+8, _value2[1]); + output.Store(104+16, _value2[2]); + } + half3x4 _e243 = input_uniform.val_mat3x4_; + { + half3x4 _value2 = half3x4(float3x4(_e243)); + output.Store(128+0, _value2[0]); + output.Store(128+8, _value2[1]); + output.Store(128+16, _value2[2]); + } + half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform); + { + half4x2 _value2 = half4x2(float4x2(_e250)); + output.Store(152+0, _value2[0]); + output.Store(152+4, _value2[1]); + output.Store(152+8, _value2[2]); + output.Store(152+12, _value2[3]); + } + half4x3 _e257 = input_uniform.val_mat4x3_; + { + half4x3 _value2 = half4x3(float4x3(_e257)); + output.Store(168+0, _value2[0]); + output.Store(168+8, _value2[1]); + output.Store(168+16, _value2[2]); + output.Store(168+24, _value2[3]); + } + half4x4 _e264 = input_uniform.val_mat4x4_; + { + half4x4 _value2 = half4x4(float4x4(_e264)); + output.Store(200+0, _value2[0]); + output.Store(200+8, _value2[1]); + output.Store(200+16, _value2[2]); + output.Store(200+24, _value2[3]); + } + half _e267 = val; + return _e267; +} + +[numthreads(1, 1, 1)] +void main() +{ + const half _e3 = f16_function(2.0h); + output.Store(40, _e3); + return; +} diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron new file mode 100644 index 0000000000..b396a4626e --- /dev/null +++ b/naga/tests/out/hlsl/f16.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"main", + target_profile:"cs_6_2", + ), + ], +) diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl index 8288dfad92..4d6bb568f3 100644 --- a/naga/tests/out/msl/extra.msl +++ b/naga/tests/out/msl/extra.msl @@ -6,7 +6,7 @@ using metal::uint; struct PushConstants { uint index; - char _pad1[12]; + char _pad1[4]; metal::float2 double_; }; struct FragmentIn { diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl new file mode 100644 index 0000000000..40be58e901 --- /dev/null +++ b/naga/tests/out/msl/f16.msl @@ -0,0 +1,177 @@ +// language: metal1.0 +#include +#include + +using metal::uint; + +struct UniformCompatible { + uint val_u32_; + int val_i32_; + float val_f32_; + half val_f16_; + char _pad4[2]; + metal::half2 val_f16_2_; + char _pad5[4]; + metal::half3 val_f16_3_; + metal::half4 val_f16_4_; + half final_value; + char _pad8[2]; + metal::half2x2 val_mat2x2_; + char _pad9[4]; + metal::half2x3 val_mat2x3_; + metal::half2x4 val_mat2x4_; + metal::half3x2 val_mat3x2_; + char _pad12[4]; + metal::half3x3 val_mat3x3_; + metal::half3x4 val_mat3x4_; + metal::half4x2 val_mat4x2_; + metal::half4x3 val_mat4x3_; + metal::half4x4 val_mat4x4_; +}; +struct type_16 { + half inner[2]; +}; +struct StorageCompatible { + type_16 val_f16_array_2_; +}; +struct LayoutTest { + half scalar1_; + half scalar2_; + char _pad2[4]; + metal::packed_half3 v3_; + half tuck_in; + half scalar4_; + char _pad5[2]; + uint larger; +}; +constant half constant_variable = 15.203125h; + +half f16_function( + half x, + constant UniformCompatible& input_uniform, + device UniformCompatible const& input_storage, + device StorageCompatible const& input_arrays, + device UniformCompatible& output, + device StorageCompatible& output_arrays +) { + half val = 15.203125h; + half _e4 = val; + val = _e4 + -33344.0h; + half _e6 = val; + half _e9 = val; + val = _e9 + (_e6 + 5.0h); + float _e13 = input_uniform.val_f32_; + half _e14 = val; + half _e18 = val; + val = _e18 + static_cast(_e13 + static_cast(_e14)); + half _e22 = input_uniform.val_f16_; + half _e25 = val; + val = _e25 + metal::half3(_e22).z; + half _e31 = input_uniform.val_f16_; + half _e34 = input_storage.val_f16_; + output.val_f16_ = _e31 + _e34; + metal::half2 _e40 = input_uniform.val_f16_2_; + metal::half2 _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = _e40 + _e43; + metal::half3 _e49 = input_uniform.val_f16_3_; + metal::half3 _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = _e49 + _e52; + metal::half4 _e58 = input_uniform.val_f16_4_; + metal::half4 _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = _e58 + _e61; + metal::half2x2 _e67 = input_uniform.val_mat2x2_; + metal::half2x2 _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = _e67 + _e70; + metal::half2x3 _e76 = input_uniform.val_mat2x3_; + metal::half2x3 _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = _e76 + _e79; + metal::half2x4 _e85 = input_uniform.val_mat2x4_; + metal::half2x4 _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = _e85 + _e88; + metal::half3x2 _e94 = input_uniform.val_mat3x2_; + metal::half3x2 _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = _e94 + _e97; + metal::half3x3 _e103 = input_uniform.val_mat3x3_; + metal::half3x3 _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = _e103 + _e106; + metal::half3x4 _e112 = input_uniform.val_mat3x4_; + metal::half3x4 _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = _e112 + _e115; + metal::half4x2 _e121 = input_uniform.val_mat4x2_; + metal::half4x2 _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = _e121 + _e124; + metal::half4x3 _e130 = input_uniform.val_mat4x3_; + metal::half4x3 _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = _e130 + _e133; + metal::half4x4 _e139 = input_uniform.val_mat4x4_; + metal::half4x4 _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = _e139 + _e142; + type_16 _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + half _e149 = val; + half _e151 = val; + val = _e151 + metal::abs(_e149); + half _e153 = val; + half _e154 = val; + half _e155 = val; + half _e157 = val; + val = _e157 + metal::clamp(_e153, _e154, _e155); + half _e159 = val; + half _e161 = val; + half _e164 = val; + val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161)); + half _e166 = val; + half _e167 = val; + half _e169 = val; + val = _e169 + metal::max(_e166, _e167); + half _e171 = val; + half _e172 = val; + half _e174 = val; + val = _e174 + metal::min(_e171, _e172); + half _e176 = val; + half _e178 = val; + val = _e178 + metal::sign(_e176); + half _e181 = val; + val = _e181 + 1.0h; + metal::half2 _e185 = input_uniform.val_f16_2_; + metal::float2 float_vec2_ = static_cast(_e185); + output.val_f16_2_ = static_cast(float_vec2_); + metal::half3 _e192 = input_uniform.val_f16_3_; + metal::float3 float_vec3_ = static_cast(_e192); + output.val_f16_3_ = static_cast(float_vec3_); + metal::half4 _e199 = input_uniform.val_f16_4_; + metal::float4 float_vec4_ = static_cast(_e199); + output.val_f16_4_ = static_cast(float_vec4_); + metal::half2x2 _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208)); + metal::half2x3 _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215)); + metal::half2x4 _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222)); + metal::half3x2 _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229)); + metal::half3x3 _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236)); + metal::half3x4 _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243)); + metal::half4x2 _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250)); + metal::half4x3 _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257)); + metal::half4x4 _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264)); + half _e267 = val; + return _e267; +} + +kernel void main_( + constant UniformCompatible& input_uniform [[user(fake0)]] +, device UniformCompatible const& input_storage [[user(fake0)]] +, device StorageCompatible const& input_arrays [[user(fake0)]] +, device UniformCompatible& output [[user(fake0)]] +, device StorageCompatible& output_arrays [[user(fake0)]] +) { + half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays); + output.final_value = _e3; + return; +} diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm index 2697eee855..b7dbc9e0fc 100644 --- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm +++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 175 +; Bound: 173 OpCapability Shader OpCapability Int64Atomics OpCapability Int64 @@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64" -OpEntryPoint GLCompute %105 "test_atomic_compare_exchange_u64" +OpEntryPoint GLCompute %104 "test_atomic_compare_exchange_u64" OpExecutionMode %19 LocalSize 1 1 1 -OpExecutionMode %105 LocalSize 1 1 1 +OpExecutionMode %104 LocalSize 1 1 1 OpDecorate %5 ArrayStride 8 OpDecorate %8 ArrayStride 8 OpMemberDecorate %10 0 Offset 0 @@ -63,12 +63,12 @@ OpMemberDecorate %16 0 Offset 0 %68 = OpTypeInt 32 1 %67 = OpConstant %68 1 %69 = OpConstant %3 64 -%106 = OpTypePointer StorageBuffer %8 -%108 = OpConstant %7 10 -%111 = OpTypePointer Function %7 -%112 = OpConstantNull %7 -%114 = OpConstantNull %9 -%138 = OpTypePointer StorageBuffer %7 +%105 = OpTypePointer StorageBuffer %8 +%107 = OpConstant %7 10 +%110 = OpTypePointer Function %7 +%111 = OpConstantNull %7 +%113 = OpConstantNull %9 +%137 = OpTypePointer StorageBuffer %7 %19 = OpFunction %2 None %20 %18 = OpLabel %27 = OpVariable %28 Function %22 @@ -142,16 +142,16 @@ OpBranch %89 %89 = OpLabel %91 = OpLoad %4 %29 %92 = OpIAdd %4 %91 %25 -%94 = OpLoad %3 %27 -%95 = OpLoad %4 %29 -%97 = OpAccessChain %64 %23 %94 -%98 = OpAtomicCompareExchange %4 %97 %67 %69 %69 %92 %95 -%99 = OpIEqual %9 %98 %95 -%96 = OpCompositeConstruct %10 %98 %99 -%100 = OpCompositeExtract %4 %96 0 -OpStore %29 %100 -%101 = OpCompositeExtract %9 %96 1 -OpStore %32 %101 +%93 = OpLoad %3 %27 +%94 = OpLoad %4 %29 +%96 = OpAccessChain %64 %23 %93 +%97 = OpAtomicCompareExchange %4 %96 %67 %69 %69 %92 %94 +%98 = OpIEqual %9 %97 %94 +%95 = OpCompositeConstruct %10 %97 %98 +%99 = OpCompositeExtract %4 %95 0 +OpStore %29 %99 +%100 = OpCompositeExtract %9 %95 1 +OpStore %32 %100 OpBranch %90 %90 = OpLabel OpBranch %73 @@ -162,110 +162,110 @@ OpBranch %62 %62 = OpLabel OpBranch %39 %39 = OpLabel -%102 = OpLoad %3 %27 -%103 = OpIAdd %3 %102 %26 -OpStore %27 %103 +%101 = OpLoad %3 %27 +%102 = OpIAdd %3 %101 %26 +OpStore %27 %102 OpBranch %36 %37 = OpLabel OpReturn OpFunctionEnd -%105 = OpFunction %2 None %20 -%104 = OpLabel -%109 = OpVariable %28 Function %22 -%110 = OpVariable %111 Function %112 -%113 = OpVariable %33 Function %114 -%120 = OpVariable %41 Function %43 -%145 = OpVariable %41 Function %43 -%107 = OpAccessChain %106 %15 %22 +%104 = OpFunction %2 None %20 +%103 = OpLabel +%108 = OpVariable %28 Function %22 +%109 = OpVariable %110 Function %111 +%112 = OpVariable %33 Function %113 +%119 = OpVariable %41 Function %43 +%144 = OpVariable %41 Function %43 +%106 = OpAccessChain %105 %15 %22 +OpBranch %114 +%114 = OpLabel OpBranch %115 %115 = OpLabel -OpBranch %116 -%116 = OpLabel -OpLoopMerge %117 %119 None -OpBranch %121 -%121 = OpLabel -%122 = OpLoad %40 %120 -%123 = OpIEqual %42 %45 %122 -%124 = OpAll %9 %123 -OpSelectionMerge %125 None -OpBranchConditional %124 %117 %125 -%125 = OpLabel -%126 = OpCompositeExtract %3 %122 1 -%127 = OpIEqual %9 %126 %44 -%128 = OpSelect %3 %127 %26 %22 -%129 = OpCompositeConstruct %40 %128 %26 -%130 = OpIAdd %40 %122 %129 -OpStore %120 %130 -OpBranch %118 -%118 = OpLabel -%131 = OpLoad %3 %109 -%132 = OpULessThan %9 %131 %6 -OpSelectionMerge %133 None -OpBranchConditional %132 %133 %134 -%134 = OpLabel +OpLoopMerge %116 %118 None +OpBranch %120 +%120 = OpLabel +%121 = OpLoad %40 %119 +%122 = OpIEqual %42 %45 %121 +%123 = OpAll %9 %122 +OpSelectionMerge %124 None +OpBranchConditional %123 %116 %124 +%124 = OpLabel +%125 = OpCompositeExtract %3 %121 1 +%126 = OpIEqual %9 %125 %44 +%127 = OpSelect %3 %126 %26 %22 +%128 = OpCompositeConstruct %40 %127 %26 +%129 = OpIAdd %40 %121 %128 +OpStore %119 %129 OpBranch %117 +%117 = OpLabel +%130 = OpLoad %3 %108 +%131 = OpULessThan %9 %130 %6 +OpSelectionMerge %132 None +OpBranchConditional %131 %132 %133 %133 = OpLabel -OpBranch %135 -%135 = OpLabel -%137 = OpLoad %3 %109 -%139 = OpAccessChain %138 %107 %137 -%140 = OpAtomicLoad %7 %139 %67 %69 -OpStore %110 %140 -OpStore %113 %24 -OpBranch %141 -%141 = OpLabel -OpLoopMerge %142 %144 None -OpBranch %146 -%146 = OpLabel -%147 = OpLoad %40 %145 -%148 = OpIEqual %42 %45 %147 -%149 = OpAll %9 %148 -OpSelectionMerge %150 None -OpBranchConditional %149 %142 %150 -%150 = OpLabel -%151 = OpCompositeExtract %3 %147 1 -%152 = OpIEqual %9 %151 %44 -%153 = OpSelect %3 %152 %26 %22 -%154 = OpCompositeConstruct %40 %153 %26 -%155 = OpIAdd %40 %147 %154 -OpStore %145 %155 -OpBranch %143 -%143 = OpLabel -%156 = OpLoad %9 %113 -%157 = OpLogicalNot %9 %156 -OpSelectionMerge %158 None -OpBranchConditional %157 %158 %159 -%159 = OpLabel +OpBranch %116 +%132 = OpLabel +OpBranch %134 +%134 = OpLabel +%136 = OpLoad %3 %108 +%138 = OpAccessChain %137 %106 %136 +%139 = OpAtomicLoad %7 %138 %67 %69 +OpStore %109 %139 +OpStore %112 %24 +OpBranch %140 +%140 = OpLabel +OpLoopMerge %141 %143 None +OpBranch %145 +%145 = OpLabel +%146 = OpLoad %40 %144 +%147 = OpIEqual %42 %45 %146 +%148 = OpAll %9 %147 +OpSelectionMerge %149 None +OpBranchConditional %148 %141 %149 +%149 = OpLabel +%150 = OpCompositeExtract %3 %146 1 +%151 = OpIEqual %9 %150 %44 +%152 = OpSelect %3 %151 %26 %22 +%153 = OpCompositeConstruct %40 %152 %26 +%154 = OpIAdd %40 %146 %153 +OpStore %144 %154 OpBranch %142 +%142 = OpLabel +%155 = OpLoad %9 %112 +%156 = OpLogicalNot %9 %155 +OpSelectionMerge %157 None +OpBranchConditional %156 %157 %158 %158 = OpLabel +OpBranch %141 +%157 = OpLabel +OpBranch %159 +%159 = OpLabel +%161 = OpLoad %7 %109 +%162 = OpIAdd %7 %161 %107 +%163 = OpLoad %3 %108 +%164 = OpLoad %7 %109 +%166 = OpAccessChain %137 %106 %163 +%167 = OpAtomicCompareExchange %7 %166 %67 %69 %69 %162 %164 +%168 = OpIEqual %9 %167 %164 +%165 = OpCompositeConstruct %11 %167 %168 +%169 = OpCompositeExtract %7 %165 0 +OpStore %109 %169 +%170 = OpCompositeExtract %9 %165 1 +OpStore %112 %170 OpBranch %160 %160 = OpLabel -%162 = OpLoad %7 %110 -%163 = OpIAdd %7 %162 %108 -%165 = OpLoad %3 %109 -%166 = OpLoad %7 %110 -%168 = OpAccessChain %138 %107 %165 -%169 = OpAtomicCompareExchange %7 %168 %67 %69 %69 %163 %166 -%170 = OpIEqual %9 %169 %166 -%167 = OpCompositeConstruct %11 %169 %170 -%171 = OpCompositeExtract %7 %167 0 -OpStore %110 %171 -%172 = OpCompositeExtract %9 %167 1 -OpStore %113 %172 -OpBranch %161 -%161 = OpLabel -OpBranch %144 -%144 = OpLabel -OpBranch %141 -%142 = OpLabel -OpBranch %136 -%136 = OpLabel -OpBranch %119 -%119 = OpLabel -%173 = OpLoad %3 %109 -%174 = OpIAdd %3 %173 %26 -OpStore %109 %174 -OpBranch %116 -%117 = OpLabel +OpBranch %143 +%143 = OpLabel +OpBranch %140 +%141 = OpLabel +OpBranch %135 +%135 = OpLabel +OpBranch %118 +%118 = OpLabel +%171 = OpLoad %3 %108 +%172 = OpIAdd %3 %171 %26 +OpStore %108 %172 +OpBranch %115 +%116 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm index 9c434a8ce2..0e84427bad 100644 --- a/naga/tests/out/spv/extra.spvasm +++ b/naga/tests/out/spv/extra.spvasm @@ -1,76 +1,74 @@ ; SPIR-V ; Version: 1.2 ; Generator: rspirv -; Bound: 48 +; Bound: 47 OpCapability Shader -OpCapability Float64 OpCapability Geometry %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %23 "main" %15 %18 %21 -OpExecutionMode %23 OriginUpperLeft +OpEntryPoint Fragment %22 "main" %14 %17 %20 +OpExecutionMode %22 OriginUpperLeft OpMemberDecorate %6 0 Offset 0 -OpMemberDecorate %6 1 Offset 16 -OpMemberDecorate %9 0 Offset 0 -OpMemberDecorate %9 1 Offset 16 -OpDecorate %11 Block -OpMemberDecorate %11 0 Offset 0 -OpDecorate %15 Location 0 -OpDecorate %18 BuiltIn PrimitiveId -OpDecorate %18 Flat -OpDecorate %21 Location 0 +OpMemberDecorate %6 1 Offset 8 +OpMemberDecorate %8 0 Offset 0 +OpMemberDecorate %8 1 Offset 16 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %14 Location 0 +OpDecorate %17 BuiltIn PrimitiveId +OpDecorate %17 Flat +OpDecorate %20 Location 0 %2 = OpTypeVoid %3 = OpTypeInt 32 0 -%5 = OpTypeFloat 64 +%5 = OpTypeFloat 32 %4 = OpTypeVector %5 2 %6 = OpTypeStruct %3 %4 -%8 = OpTypeFloat 32 -%7 = OpTypeVector %8 4 -%9 = OpTypeStruct %7 %3 -%11 = OpTypeStruct %6 -%12 = OpTypePointer PushConstant %11 -%10 = OpVariable %12 PushConstant -%16 = OpTypePointer Input %7 -%15 = OpVariable %16 Input -%19 = OpTypePointer Input %3 -%18 = OpVariable %19 Input -%22 = OpTypePointer Output %7 -%21 = OpVariable %22 Output -%24 = OpTypeFunction %2 -%25 = OpTypePointer PushConstant %6 -%26 = OpConstant %3 0 -%28 = OpConstant %8 1.0 -%29 = OpTypeVector %8 3 -%30 = OpConstantComposite %29 %28 %28 %28 -%33 = OpTypePointer PushConstant %3 -%36 = OpTypeBool -%23 = OpFunction %2 None %24 -%13 = OpLabel -%17 = OpLoad %7 %15 -%20 = OpLoad %3 %18 -%14 = OpCompositeConstruct %9 %17 %20 -%27 = OpAccessChain %25 %10 %26 -OpBranch %31 -%31 = OpLabel -%32 = OpCompositeExtract %3 %14 1 -%34 = OpAccessChain %33 %27 %26 -%35 = OpLoad %3 %34 -%37 = OpIEqual %36 %32 %35 -OpSelectionMerge %38 None -OpBranchConditional %37 %39 %40 -%39 = OpLabel -%41 = OpCompositeExtract %7 %14 0 -OpStore %21 %41 +%7 = OpTypeVector %5 4 +%8 = OpTypeStruct %7 %3 +%10 = OpTypeStruct %6 +%11 = OpTypePointer PushConstant %10 +%9 = OpVariable %11 PushConstant +%15 = OpTypePointer Input %7 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %3 +%17 = OpVariable %18 Input +%21 = OpTypePointer Output %7 +%20 = OpVariable %21 Output +%23 = OpTypeFunction %2 +%24 = OpTypePointer PushConstant %6 +%25 = OpConstant %3 0 +%27 = OpConstant %5 1.0 +%28 = OpTypeVector %5 3 +%29 = OpConstantComposite %28 %27 %27 %27 +%32 = OpTypePointer PushConstant %3 +%35 = OpTypeBool +%22 = OpFunction %2 None %23 +%12 = OpLabel +%16 = OpLoad %7 %14 +%19 = OpLoad %3 %17 +%13 = OpCompositeConstruct %8 %16 %19 +%26 = OpAccessChain %24 %9 %25 +OpBranch %30 +%30 = OpLabel +%31 = OpCompositeExtract %3 %13 1 +%33 = OpAccessChain %32 %26 %25 +%34 = OpLoad %3 %33 +%36 = OpIEqual %35 %31 %34 +OpSelectionMerge %37 None +OpBranchConditional %36 %38 %39 +%38 = OpLabel +%40 = OpCompositeExtract %7 %13 0 +OpStore %20 %40 OpReturn -%40 = OpLabel -%42 = OpCompositeExtract %7 %14 0 -%43 = OpVectorShuffle %29 %42 %42 0 1 2 -%44 = OpFSub %29 %30 %43 -%45 = OpCompositeExtract %7 %14 0 -%46 = OpCompositeExtract %8 %45 3 -%47 = OpCompositeConstruct %7 %44 %46 -OpStore %21 %47 +%39 = OpLabel +%41 = OpCompositeExtract %7 %13 0 +%42 = OpVectorShuffle %28 %41 %41 0 1 2 +%43 = OpFSub %28 %29 %42 +%44 = OpCompositeExtract %7 %13 0 +%45 = OpCompositeExtract %5 %44 3 +%46 = OpCompositeConstruct %7 %43 %45 +OpStore %20 %46 OpReturn -%38 = OpLabel +%37 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm new file mode 100644 index 0000000000..3d8fcea7a7 --- /dev/null +++ b/naga/tests/out/spv/f16.spvasm @@ -0,0 +1,633 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 511 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %499 "main" +OpExecutionMode %499 LocalSize 1 1 1 +OpMemberDecorate %19 0 Offset 0 +OpMemberDecorate %19 1 Offset 4 +OpMemberDecorate %19 2 Offset 8 +OpMemberDecorate %19 3 Offset 12 +OpMemberDecorate %19 4 Offset 16 +OpMemberDecorate %19 5 Offset 24 +OpMemberDecorate %19 6 Offset 32 +OpMemberDecorate %19 7 Offset 40 +OpMemberDecorate %19 8 Offset 44 +OpMemberDecorate %19 8 ColMajor +OpMemberDecorate %19 8 MatrixStride 4 +OpMemberDecorate %19 9 Offset 56 +OpMemberDecorate %19 9 ColMajor +OpMemberDecorate %19 9 MatrixStride 8 +OpMemberDecorate %19 10 Offset 72 +OpMemberDecorate %19 10 ColMajor +OpMemberDecorate %19 10 MatrixStride 8 +OpMemberDecorate %19 11 Offset 88 +OpMemberDecorate %19 11 ColMajor +OpMemberDecorate %19 11 MatrixStride 4 +OpMemberDecorate %19 12 Offset 104 +OpMemberDecorate %19 12 ColMajor +OpMemberDecorate %19 12 MatrixStride 8 +OpMemberDecorate %19 13 Offset 128 +OpMemberDecorate %19 13 ColMajor +OpMemberDecorate %19 13 MatrixStride 8 +OpMemberDecorate %19 14 Offset 152 +OpMemberDecorate %19 14 ColMajor +OpMemberDecorate %19 14 MatrixStride 4 +OpMemberDecorate %19 15 Offset 168 +OpMemberDecorate %19 15 ColMajor +OpMemberDecorate %19 15 MatrixStride 8 +OpMemberDecorate %19 16 Offset 200 +OpMemberDecorate %19 16 ColMajor +OpMemberDecorate %19 16 MatrixStride 8 +OpDecorate %20 ArrayStride 2 +OpMemberDecorate %22 0 Offset 0 +OpMemberDecorate %23 0 Offset 0 +OpMemberDecorate %23 1 Offset 2 +OpMemberDecorate %23 2 Offset 8 +OpMemberDecorate %23 3 Offset 14 +OpMemberDecorate %23 4 Offset 16 +OpMemberDecorate %23 5 Offset 20 +OpDecorate %28 DescriptorSet 0 +OpDecorate %28 Binding 0 +OpDecorate %29 Block +OpMemberDecorate %29 0 Offset 0 +OpDecorate %31 NonWritable +OpDecorate %31 DescriptorSet 0 +OpDecorate %31 Binding 1 +OpDecorate %32 Block +OpMemberDecorate %32 0 Offset 0 +OpDecorate %34 NonWritable +OpDecorate %34 DescriptorSet 0 +OpDecorate %34 Binding 2 +OpDecorate %35 Block +OpMemberDecorate %35 0 Offset 0 +OpDecorate %37 DescriptorSet 0 +OpDecorate %37 Binding 3 +OpDecorate %38 Block +OpMemberDecorate %38 0 Offset 0 +OpDecorate %40 DescriptorSet 0 +OpDecorate %40 Binding 4 +OpDecorate %41 Block +OpMemberDecorate %41 0 Offset 0 +%2 = OpTypeVoid +%3 = OpTypeFloat 16 +%4 = OpTypeInt 32 0 +%5 = OpTypeInt 32 1 +%6 = OpTypeFloat 32 +%7 = OpTypeVector %3 2 +%8 = OpTypeVector %3 3 +%9 = OpTypeVector %3 4 +%10 = OpTypeMatrix %7 2 +%11 = OpTypeMatrix %8 2 +%12 = OpTypeMatrix %9 2 +%13 = OpTypeMatrix %7 3 +%14 = OpTypeMatrix %8 3 +%15 = OpTypeMatrix %9 3 +%16 = OpTypeMatrix %7 4 +%17 = OpTypeMatrix %8 4 +%18 = OpTypeMatrix %9 4 +%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18 +%21 = OpConstant %4 2 +%20 = OpTypeArray %3 %21 +%22 = OpTypeStruct %20 +%23 = OpTypeStruct %3 %3 %8 %3 %3 %4 +%24 = OpConstant %3 2.1524e-41 +%25 = OpConstant %3 2.7121e-41 +%27 = OpTypePointer Private %3 +%26 = OpVariable %27 Private %24 +%29 = OpTypeStruct %19 +%30 = OpTypePointer Uniform %29 +%28 = OpVariable %30 Uniform +%32 = OpTypeStruct %19 +%33 = OpTypePointer StorageBuffer %32 +%31 = OpVariable %33 StorageBuffer +%35 = OpTypeStruct %22 +%36 = OpTypePointer StorageBuffer %35 +%34 = OpVariable %36 StorageBuffer +%38 = OpTypeStruct %19 +%39 = OpTypePointer StorageBuffer %38 +%37 = OpVariable %39 StorageBuffer +%41 = OpTypeStruct %22 +%42 = OpTypePointer StorageBuffer %41 +%40 = OpVariable %42 StorageBuffer +%46 = OpTypeFunction %3 %3 +%47 = OpTypePointer Uniform %19 +%48 = OpConstant %4 0 +%50 = OpTypePointer StorageBuffer %19 +%52 = OpTypePointer StorageBuffer %22 +%56 = OpConstant %3 8.8991e-41 +%57 = OpConstant %3 2.4753e-41 +%59 = OpTypePointer Function %3 +%67 = OpTypePointer Uniform %6 +%76 = OpTypePointer Uniform %3 +%77 = OpConstant %4 3 +%84 = OpTypePointer StorageBuffer %3 +%91 = OpTypePointer StorageBuffer %7 +%92 = OpTypePointer Uniform %7 +%93 = OpConstant %4 4 +%100 = OpTypePointer StorageBuffer %8 +%101 = OpTypePointer Uniform %8 +%102 = OpConstant %4 5 +%109 = OpTypePointer StorageBuffer %9 +%110 = OpTypePointer Uniform %9 +%111 = OpConstant %4 6 +%118 = OpTypePointer StorageBuffer %10 +%119 = OpTypePointer Uniform %10 +%120 = OpConstant %4 8 +%133 = OpTypePointer StorageBuffer %11 +%134 = OpTypePointer Uniform %11 +%135 = OpConstant %4 9 +%148 = OpTypePointer StorageBuffer %12 +%149 = OpTypePointer Uniform %12 +%150 = OpConstant %4 10 +%163 = OpTypePointer StorageBuffer %13 +%164 = OpTypePointer Uniform %13 +%165 = OpConstant %4 11 +%181 = OpTypePointer StorageBuffer %14 +%182 = OpTypePointer Uniform %14 +%183 = OpConstant %4 12 +%199 = OpTypePointer StorageBuffer %15 +%200 = OpTypePointer Uniform %15 +%201 = OpConstant %4 13 +%217 = OpTypePointer StorageBuffer %16 +%218 = OpTypePointer Uniform %16 +%219 = OpConstant %4 14 +%238 = OpTypePointer StorageBuffer %17 +%239 = OpTypePointer Uniform %17 +%240 = OpConstant %4 15 +%259 = OpTypePointer StorageBuffer %18 +%260 = OpTypePointer Uniform %18 +%261 = OpConstant %4 16 +%280 = OpTypePointer StorageBuffer %20 +%319 = OpTypeVector %6 2 +%325 = OpTypeVector %6 3 +%331 = OpTypeVector %6 4 +%337 = OpTypeMatrix %319 2 +%351 = OpTypeMatrix %325 2 +%365 = OpTypeMatrix %331 2 +%379 = OpTypeMatrix %319 3 +%397 = OpTypeMatrix %325 3 +%415 = OpTypeMatrix %331 3 +%433 = OpTypeMatrix %319 4 +%455 = OpTypeMatrix %325 4 +%477 = OpTypeMatrix %331 4 +%500 = OpTypeFunction %2 +%506 = OpConstant %3 2.2959e-41 +%509 = OpConstant %4 7 +%45 = OpFunction %3 None %46 +%44 = OpFunctionParameter %3 +%43 = OpLabel +%58 = OpVariable %59 Function %25 +%49 = OpAccessChain %47 %28 %48 +%51 = OpAccessChain %50 %31 %48 +%53 = OpAccessChain %52 %34 %48 +%54 = OpAccessChain %50 %37 %48 +%55 = OpAccessChain %52 %40 %48 +OpBranch %60 +%60 = OpLabel +%61 = OpLoad %3 %58 +%62 = OpFAdd %3 %61 %56 +OpStore %58 %62 +%63 = OpLoad %3 %58 +%64 = OpFAdd %3 %63 %57 +%65 = OpLoad %3 %58 +%66 = OpFAdd %3 %65 %64 +OpStore %58 %66 +%68 = OpAccessChain %67 %49 %21 +%69 = OpLoad %6 %68 +%70 = OpLoad %3 %58 +%71 = OpFConvert %6 %70 +%72 = OpFAdd %6 %69 %71 +%73 = OpFConvert %3 %72 +%74 = OpLoad %3 %58 +%75 = OpFAdd %3 %74 %73 +OpStore %58 %75 +%78 = OpAccessChain %76 %49 %77 +%79 = OpLoad %3 %78 +%80 = OpCompositeConstruct %8 %79 %79 %79 +%81 = OpCompositeExtract %3 %80 2 +%82 = OpLoad %3 %58 +%83 = OpFAdd %3 %82 %81 +OpStore %58 %83 +%85 = OpAccessChain %76 %49 %77 +%86 = OpLoad %3 %85 +%87 = OpAccessChain %84 %51 %77 +%88 = OpLoad %3 %87 +%89 = OpFAdd %3 %86 %88 +%90 = OpAccessChain %84 %54 %77 +OpStore %90 %89 +%94 = OpAccessChain %92 %49 %93 +%95 = OpLoad %7 %94 +%96 = OpAccessChain %91 %51 %93 +%97 = OpLoad %7 %96 +%98 = OpFAdd %7 %95 %97 +%99 = OpAccessChain %91 %54 %93 +OpStore %99 %98 +%103 = OpAccessChain %101 %49 %102 +%104 = OpLoad %8 %103 +%105 = OpAccessChain %100 %51 %102 +%106 = OpLoad %8 %105 +%107 = OpFAdd %8 %104 %106 +%108 = OpAccessChain %100 %54 %102 +OpStore %108 %107 +%112 = OpAccessChain %110 %49 %111 +%113 = OpLoad %9 %112 +%114 = OpAccessChain %109 %51 %111 +%115 = OpLoad %9 %114 +%116 = OpFAdd %9 %113 %115 +%117 = OpAccessChain %109 %54 %111 +OpStore %117 %116 +%121 = OpAccessChain %119 %49 %120 +%122 = OpLoad %10 %121 +%123 = OpAccessChain %118 %51 %120 +%124 = OpLoad %10 %123 +%126 = OpCompositeExtract %7 %122 0 +%127 = OpCompositeExtract %7 %124 0 +%128 = OpFAdd %7 %126 %127 +%129 = OpCompositeExtract %7 %122 1 +%130 = OpCompositeExtract %7 %124 1 +%131 = OpFAdd %7 %129 %130 +%125 = OpCompositeConstruct %10 %128 %131 +%132 = OpAccessChain %118 %54 %120 +OpStore %132 %125 +%136 = OpAccessChain %134 %49 %135 +%137 = OpLoad %11 %136 +%138 = OpAccessChain %133 %51 %135 +%139 = OpLoad %11 %138 +%141 = OpCompositeExtract %8 %137 0 +%142 = OpCompositeExtract %8 %139 0 +%143 = OpFAdd %8 %141 %142 +%144 = OpCompositeExtract %8 %137 1 +%145 = OpCompositeExtract %8 %139 1 +%146 = OpFAdd %8 %144 %145 +%140 = OpCompositeConstruct %11 %143 %146 +%147 = OpAccessChain %133 %54 %135 +OpStore %147 %140 +%151 = OpAccessChain %149 %49 %150 +%152 = OpLoad %12 %151 +%153 = OpAccessChain %148 %51 %150 +%154 = OpLoad %12 %153 +%156 = OpCompositeExtract %9 %152 0 +%157 = OpCompositeExtract %9 %154 0 +%158 = OpFAdd %9 %156 %157 +%159 = OpCompositeExtract %9 %152 1 +%160 = OpCompositeExtract %9 %154 1 +%161 = OpFAdd %9 %159 %160 +%155 = OpCompositeConstruct %12 %158 %161 +%162 = OpAccessChain %148 %54 %150 +OpStore %162 %155 +%166 = OpAccessChain %164 %49 %165 +%167 = OpLoad %13 %166 +%168 = OpAccessChain %163 %51 %165 +%169 = OpLoad %13 %168 +%171 = OpCompositeExtract %7 %167 0 +%172 = OpCompositeExtract %7 %169 0 +%173 = OpFAdd %7 %171 %172 +%174 = OpCompositeExtract %7 %167 1 +%175 = OpCompositeExtract %7 %169 1 +%176 = OpFAdd %7 %174 %175 +%177 = OpCompositeExtract %7 %167 2 +%178 = OpCompositeExtract %7 %169 2 +%179 = OpFAdd %7 %177 %178 +%170 = OpCompositeConstruct %13 %173 %176 %179 +%180 = OpAccessChain %163 %54 %165 +OpStore %180 %170 +%184 = OpAccessChain %182 %49 %183 +%185 = OpLoad %14 %184 +%186 = OpAccessChain %181 %51 %183 +%187 = OpLoad %14 %186 +%189 = OpCompositeExtract %8 %185 0 +%190 = OpCompositeExtract %8 %187 0 +%191 = OpFAdd %8 %189 %190 +%192 = OpCompositeExtract %8 %185 1 +%193 = OpCompositeExtract %8 %187 1 +%194 = OpFAdd %8 %192 %193 +%195 = OpCompositeExtract %8 %185 2 +%196 = OpCompositeExtract %8 %187 2 +%197 = OpFAdd %8 %195 %196 +%188 = OpCompositeConstruct %14 %191 %194 %197 +%198 = OpAccessChain %181 %54 %183 +OpStore %198 %188 +%202 = OpAccessChain %200 %49 %201 +%203 = OpLoad %15 %202 +%204 = OpAccessChain %199 %51 %201 +%205 = OpLoad %15 %204 +%207 = OpCompositeExtract %9 %203 0 +%208 = OpCompositeExtract %9 %205 0 +%209 = OpFAdd %9 %207 %208 +%210 = OpCompositeExtract %9 %203 1 +%211 = OpCompositeExtract %9 %205 1 +%212 = OpFAdd %9 %210 %211 +%213 = OpCompositeExtract %9 %203 2 +%214 = OpCompositeExtract %9 %205 2 +%215 = OpFAdd %9 %213 %214 +%206 = OpCompositeConstruct %15 %209 %212 %215 +%216 = OpAccessChain %199 %54 %201 +OpStore %216 %206 +%220 = OpAccessChain %218 %49 %219 +%221 = OpLoad %16 %220 +%222 = OpAccessChain %217 %51 %219 +%223 = OpLoad %16 %222 +%225 = OpCompositeExtract %7 %221 0 +%226 = OpCompositeExtract %7 %223 0 +%227 = OpFAdd %7 %225 %226 +%228 = OpCompositeExtract %7 %221 1 +%229 = OpCompositeExtract %7 %223 1 +%230 = OpFAdd %7 %228 %229 +%231 = OpCompositeExtract %7 %221 2 +%232 = OpCompositeExtract %7 %223 2 +%233 = OpFAdd %7 %231 %232 +%234 = OpCompositeExtract %7 %221 3 +%235 = OpCompositeExtract %7 %223 3 +%236 = OpFAdd %7 %234 %235 +%224 = OpCompositeConstruct %16 %227 %230 %233 %236 +%237 = OpAccessChain %217 %54 %219 +OpStore %237 %224 +%241 = OpAccessChain %239 %49 %240 +%242 = OpLoad %17 %241 +%243 = OpAccessChain %238 %51 %240 +%244 = OpLoad %17 %243 +%246 = OpCompositeExtract %8 %242 0 +%247 = OpCompositeExtract %8 %244 0 +%248 = OpFAdd %8 %246 %247 +%249 = OpCompositeExtract %8 %242 1 +%250 = OpCompositeExtract %8 %244 1 +%251 = OpFAdd %8 %249 %250 +%252 = OpCompositeExtract %8 %242 2 +%253 = OpCompositeExtract %8 %244 2 +%254 = OpFAdd %8 %252 %253 +%255 = OpCompositeExtract %8 %242 3 +%256 = OpCompositeExtract %8 %244 3 +%257 = OpFAdd %8 %255 %256 +%245 = OpCompositeConstruct %17 %248 %251 %254 %257 +%258 = OpAccessChain %238 %54 %240 +OpStore %258 %245 +%262 = OpAccessChain %260 %49 %261 +%263 = OpLoad %18 %262 +%264 = OpAccessChain %259 %51 %261 +%265 = OpLoad %18 %264 +%267 = OpCompositeExtract %9 %263 0 +%268 = OpCompositeExtract %9 %265 0 +%269 = OpFAdd %9 %267 %268 +%270 = OpCompositeExtract %9 %263 1 +%271 = OpCompositeExtract %9 %265 1 +%272 = OpFAdd %9 %270 %271 +%273 = OpCompositeExtract %9 %263 2 +%274 = OpCompositeExtract %9 %265 2 +%275 = OpFAdd %9 %273 %274 +%276 = OpCompositeExtract %9 %263 3 +%277 = OpCompositeExtract %9 %265 3 +%278 = OpFAdd %9 %276 %277 +%266 = OpCompositeConstruct %18 %269 %272 %275 %278 +%279 = OpAccessChain %259 %54 %261 +OpStore %279 %266 +%281 = OpAccessChain %280 %53 %48 +%282 = OpLoad %20 %281 +%283 = OpAccessChain %280 %55 %48 +OpStore %283 %282 +%284 = OpLoad %3 %58 +%285 = OpExtInst %3 %1 FAbs %284 +%286 = OpLoad %3 %58 +%287 = OpFAdd %3 %286 %285 +OpStore %58 %287 +%288 = OpLoad %3 %58 +%289 = OpLoad %3 %58 +%290 = OpLoad %3 %58 +%291 = OpExtInst %3 %1 FClamp %288 %289 %290 +%292 = OpLoad %3 %58 +%293 = OpFAdd %3 %292 %291 +OpStore %58 %293 +%294 = OpLoad %3 %58 +%295 = OpCompositeConstruct %7 %294 %294 +%296 = OpLoad %3 %58 +%297 = OpCompositeConstruct %7 %296 %296 +%298 = OpDot %3 %295 %297 +%299 = OpLoad %3 %58 +%300 = OpFAdd %3 %299 %298 +OpStore %58 %300 +%301 = OpLoad %3 %58 +%302 = OpLoad %3 %58 +%303 = OpExtInst %3 %1 FMax %301 %302 +%304 = OpLoad %3 %58 +%305 = OpFAdd %3 %304 %303 +OpStore %58 %305 +%306 = OpLoad %3 %58 +%307 = OpLoad %3 %58 +%308 = OpExtInst %3 %1 FMin %306 %307 +%309 = OpLoad %3 %58 +%310 = OpFAdd %3 %309 %308 +OpStore %58 %310 +%311 = OpLoad %3 %58 +%312 = OpExtInst %3 %1 FSign %311 +%313 = OpLoad %3 %58 +%314 = OpFAdd %3 %313 %312 +OpStore %58 %314 +%315 = OpLoad %3 %58 +%316 = OpFAdd %3 %315 %24 +OpStore %58 %316 +%317 = OpAccessChain %92 %49 %93 +%318 = OpLoad %7 %317 +%320 = OpFConvert %319 %318 +%321 = OpFConvert %7 %320 +%322 = OpAccessChain %91 %54 %93 +OpStore %322 %321 +%323 = OpAccessChain %101 %49 %102 +%324 = OpLoad %8 %323 +%326 = OpFConvert %325 %324 +%327 = OpFConvert %8 %326 +%328 = OpAccessChain %100 %54 %102 +OpStore %328 %327 +%329 = OpAccessChain %110 %49 %111 +%330 = OpLoad %9 %329 +%332 = OpFConvert %331 %330 +%333 = OpFConvert %9 %332 +%334 = OpAccessChain %109 %54 %111 +OpStore %334 %333 +%335 = OpAccessChain %119 %49 %120 +%336 = OpLoad %10 %335 +%338 = OpCompositeExtract %7 %336 0 +%339 = OpFConvert %319 %338 +%340 = OpCompositeExtract %7 %336 1 +%341 = OpFConvert %319 %340 +%342 = OpCompositeConstruct %337 %339 %341 +%343 = OpCompositeExtract %319 %342 0 +%344 = OpFConvert %7 %343 +%345 = OpCompositeExtract %319 %342 1 +%346 = OpFConvert %7 %345 +%347 = OpCompositeConstruct %10 %344 %346 +%348 = OpAccessChain %118 %54 %120 +OpStore %348 %347 +%349 = OpAccessChain %134 %49 %135 +%350 = OpLoad %11 %349 +%352 = OpCompositeExtract %8 %350 0 +%353 = OpFConvert %325 %352 +%354 = OpCompositeExtract %8 %350 1 +%355 = OpFConvert %325 %354 +%356 = OpCompositeConstruct %351 %353 %355 +%357 = OpCompositeExtract %325 %356 0 +%358 = OpFConvert %8 %357 +%359 = OpCompositeExtract %325 %356 1 +%360 = OpFConvert %8 %359 +%361 = OpCompositeConstruct %11 %358 %360 +%362 = OpAccessChain %133 %54 %135 +OpStore %362 %361 +%363 = OpAccessChain %149 %49 %150 +%364 = OpLoad %12 %363 +%366 = OpCompositeExtract %9 %364 0 +%367 = OpFConvert %331 %366 +%368 = OpCompositeExtract %9 %364 1 +%369 = OpFConvert %331 %368 +%370 = OpCompositeConstruct %365 %367 %369 +%371 = OpCompositeExtract %331 %370 0 +%372 = OpFConvert %9 %371 +%373 = OpCompositeExtract %331 %370 1 +%374 = OpFConvert %9 %373 +%375 = OpCompositeConstruct %12 %372 %374 +%376 = OpAccessChain %148 %54 %150 +OpStore %376 %375 +%377 = OpAccessChain %164 %49 %165 +%378 = OpLoad %13 %377 +%380 = OpCompositeExtract %7 %378 0 +%381 = OpFConvert %319 %380 +%382 = OpCompositeExtract %7 %378 1 +%383 = OpFConvert %319 %382 +%384 = OpCompositeExtract %7 %378 2 +%385 = OpFConvert %319 %384 +%386 = OpCompositeConstruct %379 %381 %383 %385 +%387 = OpCompositeExtract %319 %386 0 +%388 = OpFConvert %7 %387 +%389 = OpCompositeExtract %319 %386 1 +%390 = OpFConvert %7 %389 +%391 = OpCompositeExtract %319 %386 2 +%392 = OpFConvert %7 %391 +%393 = OpCompositeConstruct %13 %388 %390 %392 +%394 = OpAccessChain %163 %54 %165 +OpStore %394 %393 +%395 = OpAccessChain %182 %49 %183 +%396 = OpLoad %14 %395 +%398 = OpCompositeExtract %8 %396 0 +%399 = OpFConvert %325 %398 +%400 = OpCompositeExtract %8 %396 1 +%401 = OpFConvert %325 %400 +%402 = OpCompositeExtract %8 %396 2 +%403 = OpFConvert %325 %402 +%404 = OpCompositeConstruct %397 %399 %401 %403 +%405 = OpCompositeExtract %325 %404 0 +%406 = OpFConvert %8 %405 +%407 = OpCompositeExtract %325 %404 1 +%408 = OpFConvert %8 %407 +%409 = OpCompositeExtract %325 %404 2 +%410 = OpFConvert %8 %409 +%411 = OpCompositeConstruct %14 %406 %408 %410 +%412 = OpAccessChain %181 %54 %183 +OpStore %412 %411 +%413 = OpAccessChain %200 %49 %201 +%414 = OpLoad %15 %413 +%416 = OpCompositeExtract %9 %414 0 +%417 = OpFConvert %331 %416 +%418 = OpCompositeExtract %9 %414 1 +%419 = OpFConvert %331 %418 +%420 = OpCompositeExtract %9 %414 2 +%421 = OpFConvert %331 %420 +%422 = OpCompositeConstruct %415 %417 %419 %421 +%423 = OpCompositeExtract %331 %422 0 +%424 = OpFConvert %9 %423 +%425 = OpCompositeExtract %331 %422 1 +%426 = OpFConvert %9 %425 +%427 = OpCompositeExtract %331 %422 2 +%428 = OpFConvert %9 %427 +%429 = OpCompositeConstruct %15 %424 %426 %428 +%430 = OpAccessChain %199 %54 %201 +OpStore %430 %429 +%431 = OpAccessChain %218 %49 %219 +%432 = OpLoad %16 %431 +%434 = OpCompositeExtract %7 %432 0 +%435 = OpFConvert %319 %434 +%436 = OpCompositeExtract %7 %432 1 +%437 = OpFConvert %319 %436 +%438 = OpCompositeExtract %7 %432 2 +%439 = OpFConvert %319 %438 +%440 = OpCompositeExtract %7 %432 3 +%441 = OpFConvert %319 %440 +%442 = OpCompositeConstruct %433 %435 %437 %439 %441 +%443 = OpCompositeExtract %319 %442 0 +%444 = OpFConvert %7 %443 +%445 = OpCompositeExtract %319 %442 1 +%446 = OpFConvert %7 %445 +%447 = OpCompositeExtract %319 %442 2 +%448 = OpFConvert %7 %447 +%449 = OpCompositeExtract %319 %442 3 +%450 = OpFConvert %7 %449 +%451 = OpCompositeConstruct %16 %444 %446 %448 %450 +%452 = OpAccessChain %217 %54 %219 +OpStore %452 %451 +%453 = OpAccessChain %239 %49 %240 +%454 = OpLoad %17 %453 +%456 = OpCompositeExtract %8 %454 0 +%457 = OpFConvert %325 %456 +%458 = OpCompositeExtract %8 %454 1 +%459 = OpFConvert %325 %458 +%460 = OpCompositeExtract %8 %454 2 +%461 = OpFConvert %325 %460 +%462 = OpCompositeExtract %8 %454 3 +%463 = OpFConvert %325 %462 +%464 = OpCompositeConstruct %455 %457 %459 %461 %463 +%465 = OpCompositeExtract %325 %464 0 +%466 = OpFConvert %8 %465 +%467 = OpCompositeExtract %325 %464 1 +%468 = OpFConvert %8 %467 +%469 = OpCompositeExtract %325 %464 2 +%470 = OpFConvert %8 %469 +%471 = OpCompositeExtract %325 %464 3 +%472 = OpFConvert %8 %471 +%473 = OpCompositeConstruct %17 %466 %468 %470 %472 +%474 = OpAccessChain %238 %54 %240 +OpStore %474 %473 +%475 = OpAccessChain %260 %49 %261 +%476 = OpLoad %18 %475 +%478 = OpCompositeExtract %9 %476 0 +%479 = OpFConvert %331 %478 +%480 = OpCompositeExtract %9 %476 1 +%481 = OpFConvert %331 %480 +%482 = OpCompositeExtract %9 %476 2 +%483 = OpFConvert %331 %482 +%484 = OpCompositeExtract %9 %476 3 +%485 = OpFConvert %331 %484 +%486 = OpCompositeConstruct %477 %479 %481 %483 %485 +%487 = OpCompositeExtract %331 %486 0 +%488 = OpFConvert %9 %487 +%489 = OpCompositeExtract %331 %486 1 +%490 = OpFConvert %9 %489 +%491 = OpCompositeExtract %331 %486 2 +%492 = OpFConvert %9 %491 +%493 = OpCompositeExtract %331 %486 3 +%494 = OpFConvert %9 %493 +%495 = OpCompositeConstruct %18 %488 %490 %492 %494 +%496 = OpAccessChain %259 %54 %261 +OpStore %496 %495 +%497 = OpLoad %3 %58 +OpReturnValue %497 +OpFunctionEnd +%499 = OpFunction %2 None %500 +%498 = OpLabel +%501 = OpAccessChain %47 %28 %48 +%502 = OpAccessChain %50 %31 %48 +%503 = OpAccessChain %52 %34 %48 +%504 = OpAccessChain %50 %37 %48 +%505 = OpAccessChain %52 %40 %48 +OpBranch %507 +%507 = OpLabel +%508 = OpFunctionCall %3 %45 %506 +%510 = OpAccessChain %84 %504 %509 +OpStore %510 %508 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm index 152ede488f..f1303d23ab 100644 --- a/naga/tests/out/spv/image.spvasm +++ b/naga/tests/out/spv/image.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 547 +; Bound: 545 OpCapability Shader OpCapability Image1D OpCapability Sampled1D @@ -10,19 +10,19 @@ OpCapability ImageQuery %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %96 "main" %93 -OpEntryPoint GLCompute %190 "depth_load" %188 -OpEntryPoint Vertex %210 "queries" %208 -OpEntryPoint Vertex %262 "levels_queries" %261 -OpEntryPoint Fragment %293 "texture_sample" %292 -OpEntryPoint Fragment %440 "texture_sample_comparison" %438 -OpEntryPoint Fragment %496 "gather" %495 -OpEntryPoint Fragment %530 "depth_no_comparison" %529 +OpEntryPoint GLCompute %188 "depth_load" %186 +OpEntryPoint Vertex %208 "queries" %206 +OpEntryPoint Vertex %260 "levels_queries" %259 +OpEntryPoint Fragment %291 "texture_sample" %290 +OpEntryPoint Fragment %438 "texture_sample_comparison" %436 +OpEntryPoint Fragment %494 "gather" %493 +OpEntryPoint Fragment %528 "depth_no_comparison" %527 OpExecutionMode %96 LocalSize 16 1 1 -OpExecutionMode %190 LocalSize 16 1 1 -OpExecutionMode %293 OriginUpperLeft -OpExecutionMode %440 OriginUpperLeft -OpExecutionMode %496 OriginUpperLeft -OpExecutionMode %530 OriginUpperLeft +OpExecutionMode %188 LocalSize 16 1 1 +OpExecutionMode %291 OriginUpperLeft +OpExecutionMode %438 OriginUpperLeft +OpExecutionMode %494 OriginUpperLeft +OpExecutionMode %528 OriginUpperLeft %3 = OpString "image.wgsl" OpSource Unknown 0 %3 "@group(0) @binding(0) var image_mipmapped_src: texture_2d; @@ -248,16 +248,16 @@ OpName %72 "lhs" OpName %73 "rhs" OpName %93 "local_id" OpName %96 "main" -OpName %188 "local_id" -OpName %190 "depth_load" -OpName %210 "queries" -OpName %262 "levels_queries" -OpName %293 "texture_sample" -OpName %308 "a" -OpName %440 "texture_sample_comparison" -OpName %445 "a" -OpName %496 "gather" -OpName %530 "depth_no_comparison" +OpName %186 "local_id" +OpName %188 "depth_load" +OpName %208 "queries" +OpName %260 "levels_queries" +OpName %291 "texture_sample" +OpName %306 "a" +OpName %438 "texture_sample_comparison" +OpName %443 "a" +OpName %494 "gather" +OpName %528 "depth_no_comparison" OpDecorate %29 DescriptorSet 0 OpDecorate %29 Binding 0 OpDecorate %31 DescriptorSet 0 @@ -306,13 +306,13 @@ OpDecorate %66 Binding 3 OpDecorate %68 DescriptorSet 1 OpDecorate %68 Binding 4 OpDecorate %93 BuiltIn LocalInvocationId -OpDecorate %188 BuiltIn LocalInvocationId -OpDecorate %208 BuiltIn Position -OpDecorate %261 BuiltIn Position -OpDecorate %292 Location 0 -OpDecorate %438 Location 0 -OpDecorate %495 Location 0 -OpDecorate %529 Location 0 +OpDecorate %186 BuiltIn LocalInvocationId +OpDecorate %206 BuiltIn Position +OpDecorate %259 BuiltIn Position +OpDecorate %290 Location 0 +OpDecorate %436 Location 0 +OpDecorate %493 Location 0 +OpDecorate %527 Location 0 %2 = OpTypeVoid %5 = OpTypeInt 32 0 %4 = OpTypeImage %5 2D 0 0 0 1 Unknown @@ -399,43 +399,43 @@ OpDecorate %529 Location 0 %106 = OpConstantComposite %14 %104 %105 %108 = OpTypeVector %5 2 %116 = OpTypeVector %5 4 -%130 = OpTypeVector %15 3 -%188 = OpVariable %94 Input -%209 = OpTypePointer Output %24 -%208 = OpVariable %209 Output -%219 = OpConstant %5 0 -%261 = OpVariable %209 Output -%292 = OpVariable %209 Output -%299 = OpConstant %8 0.5 -%300 = OpTypeVector %8 2 -%301 = OpConstantComposite %300 %299 %299 -%302 = OpTypeVector %8 3 -%303 = OpConstantComposite %302 %299 %299 %299 -%304 = OpConstant %15 3 -%305 = OpConstantComposite %14 %304 %88 -%306 = OpConstant %8 2.3 -%307 = OpConstant %8 2.0 -%309 = OpTypePointer Function %24 -%310 = OpConstantNull %24 -%312 = OpTypeSampledImage %16 -%317 = OpTypeSampledImage %17 -%338 = OpTypeSampledImage %19 -%399 = OpTypeSampledImage %21 -%439 = OpTypePointer Output %8 -%438 = OpVariable %439 Output -%446 = OpTypePointer Function %8 -%447 = OpConstantNull %8 -%449 = OpTypeSampledImage %26 -%454 = OpTypeSampledImage %27 -%467 = OpTypeSampledImage %28 -%474 = OpConstant %8 0.0 -%495 = OpVariable %209 Output -%506 = OpConstant %5 1 -%509 = OpConstant %5 3 -%514 = OpTypeSampledImage %4 -%517 = OpTypeVector %15 4 -%518 = OpTypeSampledImage %18 -%529 = OpVariable %209 Output +%129 = OpTypeVector %15 3 +%186 = OpVariable %94 Input +%207 = OpTypePointer Output %24 +%206 = OpVariable %207 Output +%217 = OpConstant %5 0 +%259 = OpVariable %207 Output +%290 = OpVariable %207 Output +%297 = OpConstant %8 0.5 +%298 = OpTypeVector %8 2 +%299 = OpConstantComposite %298 %297 %297 +%300 = OpTypeVector %8 3 +%301 = OpConstantComposite %300 %297 %297 %297 +%302 = OpConstant %15 3 +%303 = OpConstantComposite %14 %302 %88 +%304 = OpConstant %8 2.3 +%305 = OpConstant %8 2.0 +%307 = OpTypePointer Function %24 +%308 = OpConstantNull %24 +%310 = OpTypeSampledImage %16 +%315 = OpTypeSampledImage %17 +%336 = OpTypeSampledImage %19 +%397 = OpTypeSampledImage %21 +%437 = OpTypePointer Output %8 +%436 = OpVariable %437 Output +%444 = OpTypePointer Function %8 +%445 = OpConstantNull %8 +%447 = OpTypeSampledImage %26 +%452 = OpTypeSampledImage %27 +%465 = OpTypeSampledImage %28 +%472 = OpConstant %8 0.0 +%493 = OpVariable %207 Output +%504 = OpConstant %5 1 +%507 = OpConstant %5 3 +%512 = OpTypeSampledImage %4 +%515 = OpTypeVector %15 4 +%516 = OpTypeSampledImage %18 +%527 = OpVariable %207 Output %70 = OpFunction %14 None %71 %72 = OpFunctionParameter %14 %73 = OpFunctionParameter %14 @@ -474,586 +474,586 @@ OpLine %3 23 18 %117 = OpImageFetch %116 %98 %113 Lod %115 OpLine %3 25 20 %118 = OpCompositeExtract %5 %95 2 -%120 = OpImageFetch %116 %98 %113 Lod %118 +%119 = OpImageFetch %116 %98 %113 Lod %118 OpLine %3 26 18 -%121 = OpCompositeExtract %5 %95 2 -%122 = OpBitcast %15 %121 -%123 = OpImageFetch %116 %99 %113 Sample %122 +%120 = OpCompositeExtract %5 %95 2 +%121 = OpBitcast %15 %120 +%122 = OpImageFetch %116 %99 %113 Sample %121 OpLine %3 27 18 -%124 = OpImageRead %116 %100 %113 +%123 = OpImageRead %116 %100 %113 OpLine %3 28 52 +%124 = OpCompositeExtract %5 %95 2 %125 = OpCompositeExtract %5 %95 2 -%126 = OpCompositeExtract %5 %95 2 -%127 = OpBitcast %15 %126 +%126 = OpBitcast %15 %125 OpLine %3 28 18 -%128 = OpIAdd %15 %127 %88 -%129 = OpBitcast %15 %125 -%131 = OpCompositeConstruct %130 %113 %129 -%132 = OpImageFetch %116 %101 %131 Lod %128 +%127 = OpIAdd %15 %126 %88 +%128 = OpBitcast %15 %124 +%130 = OpCompositeConstruct %129 %113 %128 +%131 = OpImageFetch %116 %101 %130 Lod %127 OpLine %3 29 52 -%133 = OpCompositeExtract %5 %95 2 -%134 = OpBitcast %15 %133 -%135 = OpCompositeExtract %5 %95 2 -%136 = OpBitcast %15 %135 +%132 = OpCompositeExtract %5 %95 2 +%133 = OpBitcast %15 %132 +%134 = OpCompositeExtract %5 %95 2 +%135 = OpBitcast %15 %134 OpLine %3 29 18 -%137 = OpIAdd %15 %136 %88 -%138 = OpCompositeConstruct %130 %113 %134 -%139 = OpImageFetch %116 %101 %138 Lod %137 +%136 = OpIAdd %15 %135 %88 +%137 = OpCompositeConstruct %129 %113 %133 +%138 = OpImageFetch %116 %101 %137 Lod %136 OpLine %3 30 18 -%140 = OpCompositeExtract %5 %95 0 -%141 = OpBitcast %15 %140 -%142 = OpCompositeExtract %5 %95 2 -%143 = OpBitcast %15 %142 -%144 = OpImageFetch %116 %102 %141 Lod %143 +%139 = OpCompositeExtract %5 %95 0 +%140 = OpBitcast %15 %139 +%141 = OpCompositeExtract %5 %95 2 +%142 = OpBitcast %15 %141 +%143 = OpImageFetch %116 %102 %140 Lod %142 OpLine %3 32 19 -%145 = OpBitcast %108 %113 -%146 = OpCompositeExtract %5 %95 2 -%147 = OpBitcast %15 %146 -%148 = OpImageFetch %116 %98 %145 Lod %147 +%144 = OpBitcast %108 %113 +%145 = OpCompositeExtract %5 %95 2 +%146 = OpBitcast %15 %145 +%147 = OpImageFetch %116 %98 %144 Lod %146 OpLine %3 33 19 -%149 = OpBitcast %108 %113 -%150 = OpCompositeExtract %5 %95 2 -%151 = OpBitcast %15 %150 -%152 = OpImageFetch %116 %99 %149 Sample %151 +%148 = OpBitcast %108 %113 +%149 = OpCompositeExtract %5 %95 2 +%150 = OpBitcast %15 %149 +%151 = OpImageFetch %116 %99 %148 Sample %150 OpLine %3 34 19 -%153 = OpBitcast %108 %113 -%154 = OpImageRead %116 %100 %153 +%152 = OpBitcast %108 %113 +%153 = OpImageRead %116 %100 %152 OpLine %3 35 48 -%155 = OpBitcast %108 %113 +%154 = OpBitcast %108 %113 +%155 = OpCompositeExtract %5 %95 2 %156 = OpCompositeExtract %5 %95 2 -%157 = OpCompositeExtract %5 %95 2 -%158 = OpBitcast %15 %157 +%157 = OpBitcast %15 %156 OpLine %3 35 19 -%159 = OpIAdd %15 %158 %88 -%160 = OpCompositeConstruct %13 %155 %156 -%161 = OpImageFetch %116 %101 %160 Lod %159 +%158 = OpIAdd %15 %157 %88 +%159 = OpCompositeConstruct %13 %154 %155 +%160 = OpImageFetch %116 %101 %159 Lod %158 OpLine %3 36 48 -%162 = OpBitcast %108 %113 -%163 = OpCompositeExtract %5 %95 2 -%164 = OpBitcast %15 %163 -%165 = OpCompositeExtract %5 %95 2 -%166 = OpBitcast %15 %165 +%161 = OpBitcast %108 %113 +%162 = OpCompositeExtract %5 %95 2 +%163 = OpBitcast %15 %162 +%164 = OpCompositeExtract %5 %95 2 +%165 = OpBitcast %15 %164 OpLine %3 36 19 -%167 = OpIAdd %15 %166 %88 -%168 = OpBitcast %5 %164 -%169 = OpCompositeConstruct %13 %162 %168 -%170 = OpImageFetch %116 %101 %169 Lod %167 +%166 = OpIAdd %15 %165 %88 +%167 = OpBitcast %5 %163 +%168 = OpCompositeConstruct %13 %161 %167 +%169 = OpImageFetch %116 %101 %168 Lod %166 OpLine %3 37 19 -%171 = OpCompositeExtract %5 %95 0 -%173 = OpCompositeExtract %5 %95 2 -%174 = OpBitcast %15 %173 -%175 = OpImageFetch %116 %102 %171 Lod %174 +%170 = OpCompositeExtract %5 %95 0 +%171 = OpCompositeExtract %5 %95 2 +%172 = OpBitcast %15 %171 +%173 = OpImageFetch %116 %102 %170 Lod %172 OpLine %3 39 29 -%176 = OpCompositeExtract %15 %113 0 -%177 = OpIAdd %116 %117 %123 -%178 = OpIAdd %116 %177 %124 -%179 = OpIAdd %116 %178 %132 -%180 = OpIAdd %116 %179 %139 +%174 = OpCompositeExtract %15 %113 0 +%175 = OpIAdd %116 %117 %122 +%176 = OpIAdd %116 %175 %123 +%177 = OpIAdd %116 %176 %131 +%178 = OpIAdd %116 %177 %138 OpLine %3 39 5 -OpImageWrite %103 %176 %180 +OpImageWrite %103 %174 %178 OpLine %3 41 29 -%181 = OpCompositeExtract %15 %113 0 -%182 = OpBitcast %5 %181 -%183 = OpIAdd %116 %148 %152 -%184 = OpIAdd %116 %183 %154 -%185 = OpIAdd %116 %184 %161 -%186 = OpIAdd %116 %185 %170 +%179 = OpCompositeExtract %15 %113 0 +%180 = OpBitcast %5 %179 +%181 = OpIAdd %116 %147 %151 +%182 = OpIAdd %116 %181 %153 +%183 = OpIAdd %116 %182 %160 +%184 = OpIAdd %116 %183 %169 OpLine %3 41 5 -OpImageWrite %103 %182 %186 +OpImageWrite %103 %180 %184 OpReturn OpFunctionEnd -%190 = OpFunction %2 None %97 -%187 = OpLabel -%189 = OpLoad %13 %188 -%191 = OpLoad %7 %33 -%192 = OpLoad %9 %35 -%193 = OpLoad %11 %43 -OpBranch %194 -%194 = OpLabel +%188 = OpFunction %2 None %97 +%185 = OpLabel +%187 = OpLoad %13 %186 +%189 = OpLoad %7 %33 +%190 = OpLoad %9 %35 +%191 = OpLoad %11 %43 +OpBranch %192 +%192 = OpLabel OpLine %3 46 26 -%195 = OpImageQuerySize %108 %192 +%193 = OpImageQuerySize %108 %190 OpLine %3 47 27 -%196 = OpVectorShuffle %108 %189 %189 0 1 -%197 = OpIMul %108 %195 %196 -%198 = OpBitcast %14 %197 +%194 = OpVectorShuffle %108 %187 %187 0 1 +%195 = OpIMul %108 %193 %194 +%196 = OpBitcast %14 %195 OpLine %3 47 27 -%199 = OpFunctionCall %14 %70 %198 %106 +%197 = OpFunctionCall %14 %70 %196 %106 OpLine %3 48 20 -%200 = OpCompositeExtract %5 %189 2 -%201 = OpBitcast %15 %200 -%202 = OpImageFetch %24 %191 %199 Sample %201 -%203 = OpCompositeExtract %8 %202 0 +%198 = OpCompositeExtract %5 %187 2 +%199 = OpBitcast %15 %198 +%200 = OpImageFetch %24 %189 %197 Sample %199 +%201 = OpCompositeExtract %8 %200 0 OpLine %3 49 29 -%204 = OpCompositeExtract %15 %199 0 -%205 = OpConvertFToU %5 %203 -%206 = OpCompositeConstruct %116 %205 %205 %205 %205 +%202 = OpCompositeExtract %15 %197 0 +%203 = OpConvertFToU %5 %201 +%204 = OpCompositeConstruct %116 %203 %203 %203 %203 OpLine %3 49 5 -OpImageWrite %193 %204 %206 +OpImageWrite %191 %202 %204 OpReturn OpFunctionEnd -%210 = OpFunction %2 None %97 -%207 = OpLabel -%211 = OpLoad %16 %44 -%212 = OpLoad %17 %46 -%213 = OpLoad %19 %51 -%214 = OpLoad %20 %53 -%215 = OpLoad %21 %55 -%216 = OpLoad %22 %57 -%217 = OpLoad %23 %59 -OpBranch %218 -%218 = OpLabel +%208 = OpFunction %2 None %97 +%205 = OpLabel +%209 = OpLoad %16 %44 +%210 = OpLoad %17 %46 +%211 = OpLoad %19 %51 +%212 = OpLoad %20 %53 +%213 = OpLoad %21 %55 +%214 = OpLoad %22 %57 +%215 = OpLoad %23 %59 +OpBranch %216 +%216 = OpLabel OpLine %3 74 18 -%220 = OpImageQuerySizeLod %5 %211 %219 +%218 = OpImageQuerySizeLod %5 %209 %217 OpLine %3 75 22 -%221 = OpBitcast %15 %220 -%222 = OpImageQuerySizeLod %5 %211 %221 +%219 = OpBitcast %15 %218 +%220 = OpImageQuerySizeLod %5 %209 %219 OpLine %3 76 18 -%223 = OpImageQuerySizeLod %108 %212 %219 +%221 = OpImageQuerySizeLod %108 %210 %217 OpLine %3 77 22 -%224 = OpImageQuerySizeLod %108 %212 %88 +%222 = OpImageQuerySizeLod %108 %210 %88 OpLine %3 78 24 -%225 = OpImageQuerySizeLod %13 %213 %219 -%226 = OpVectorShuffle %108 %225 %225 0 1 +%223 = OpImageQuerySizeLod %13 %211 %217 +%224 = OpVectorShuffle %108 %223 %223 0 1 OpLine %3 79 28 -%227 = OpImageQuerySizeLod %13 %213 %88 -%228 = OpVectorShuffle %108 %227 %227 0 1 +%225 = OpImageQuerySizeLod %13 %211 %88 +%226 = OpVectorShuffle %108 %225 %225 0 1 OpLine %3 80 20 -%229 = OpImageQuerySizeLod %108 %214 %219 +%227 = OpImageQuerySizeLod %108 %212 %217 OpLine %3 81 24 -%230 = OpImageQuerySizeLod %108 %214 %88 +%228 = OpImageQuerySizeLod %108 %212 %88 OpLine %3 82 26 -%231 = OpImageQuerySizeLod %13 %215 %219 -%232 = OpVectorShuffle %108 %231 %231 0 0 +%229 = OpImageQuerySizeLod %13 %213 %217 +%230 = OpVectorShuffle %108 %229 %229 0 0 OpLine %3 83 30 -%233 = OpImageQuerySizeLod %13 %215 %88 -%234 = OpVectorShuffle %108 %233 %233 0 0 +%231 = OpImageQuerySizeLod %13 %213 %88 +%232 = OpVectorShuffle %108 %231 %231 0 0 OpLine %3 84 18 -%235 = OpImageQuerySizeLod %13 %216 %219 +%233 = OpImageQuerySizeLod %13 %214 %217 OpLine %3 85 22 -%236 = OpImageQuerySizeLod %13 %216 %88 +%234 = OpImageQuerySizeLod %13 %214 %88 OpLine %3 86 21 -%237 = OpImageQuerySize %108 %217 +%235 = OpImageQuerySize %108 %215 OpLine %3 88 15 -%238 = OpCompositeExtract %5 %223 1 -%239 = OpIAdd %5 %220 %238 +%236 = OpCompositeExtract %5 %221 1 +%237 = OpIAdd %5 %218 %236 +%238 = OpCompositeExtract %5 %222 1 +%239 = OpIAdd %5 %237 %238 %240 = OpCompositeExtract %5 %224 1 %241 = OpIAdd %5 %239 %240 %242 = OpCompositeExtract %5 %226 1 %243 = OpIAdd %5 %241 %242 -%244 = OpCompositeExtract %5 %228 1 +%244 = OpCompositeExtract %5 %227 1 %245 = OpIAdd %5 %243 %244 -%246 = OpCompositeExtract %5 %229 1 +%246 = OpCompositeExtract %5 %228 1 %247 = OpIAdd %5 %245 %246 %248 = OpCompositeExtract %5 %230 1 %249 = OpIAdd %5 %247 %248 %250 = OpCompositeExtract %5 %232 1 %251 = OpIAdd %5 %249 %250 -%252 = OpCompositeExtract %5 %234 1 +%252 = OpCompositeExtract %5 %233 2 %253 = OpIAdd %5 %251 %252 -%254 = OpCompositeExtract %5 %235 2 +%254 = OpCompositeExtract %5 %234 2 %255 = OpIAdd %5 %253 %254 -%256 = OpCompositeExtract %5 %236 2 -%257 = OpIAdd %5 %255 %256 OpLine %3 91 12 -%258 = OpConvertUToF %8 %257 -%259 = OpCompositeConstruct %24 %258 %258 %258 %258 -OpStore %208 %259 +%256 = OpConvertUToF %8 %255 +%257 = OpCompositeConstruct %24 %256 %256 %256 %256 +OpStore %206 %257 OpReturn OpFunctionEnd -%262 = OpFunction %2 None %97 -%260 = OpLabel -%263 = OpLoad %17 %46 -%264 = OpLoad %19 %51 -%265 = OpLoad %20 %53 -%266 = OpLoad %21 %55 -%267 = OpLoad %22 %57 -%268 = OpLoad %23 %59 -OpBranch %269 -%269 = OpLabel +%260 = OpFunction %2 None %97 +%258 = OpLabel +%261 = OpLoad %17 %46 +%262 = OpLoad %19 %51 +%263 = OpLoad %20 %53 +%264 = OpLoad %21 %55 +%265 = OpLoad %22 %57 +%266 = OpLoad %23 %59 +OpBranch %267 +%267 = OpLabel OpLine %3 96 25 -%270 = OpImageQueryLevels %5 %263 +%268 = OpImageQueryLevels %5 %261 OpLine %3 97 25 -%271 = OpImageQuerySizeLod %13 %264 %219 -%272 = OpCompositeExtract %5 %271 2 +%269 = OpImageQuerySizeLod %13 %262 %217 +%270 = OpCompositeExtract %5 %269 2 OpLine %3 98 31 -%273 = OpImageQueryLevels %5 %264 +%271 = OpImageQueryLevels %5 %262 OpLine %3 99 31 -%274 = OpImageQuerySizeLod %13 %264 %219 -%275 = OpCompositeExtract %5 %274 2 +%272 = OpImageQuerySizeLod %13 %262 %217 +%273 = OpCompositeExtract %5 %272 2 OpLine %3 100 27 -%276 = OpImageQueryLevels %5 %265 +%274 = OpImageQueryLevels %5 %263 OpLine %3 101 33 -%277 = OpImageQueryLevels %5 %266 +%275 = OpImageQueryLevels %5 %264 OpLine %3 102 27 -%278 = OpImageQuerySizeLod %13 %266 %219 -%279 = OpCompositeExtract %5 %278 2 +%276 = OpImageQuerySizeLod %13 %264 %217 +%277 = OpCompositeExtract %5 %276 2 OpLine %3 103 25 -%280 = OpImageQueryLevels %5 %267 +%278 = OpImageQueryLevels %5 %265 OpLine %3 104 26 -%281 = OpImageQuerySamples %5 %268 +%279 = OpImageQuerySamples %5 %266 OpLine %3 106 15 -%282 = OpIAdd %5 %272 %279 -%283 = OpIAdd %5 %282 %281 -%284 = OpIAdd %5 %283 %270 -%285 = OpIAdd %5 %284 %273 -%286 = OpIAdd %5 %285 %280 -%287 = OpIAdd %5 %286 %276 -%288 = OpIAdd %5 %287 %277 +%280 = OpIAdd %5 %270 %277 +%281 = OpIAdd %5 %280 %279 +%282 = OpIAdd %5 %281 %268 +%283 = OpIAdd %5 %282 %271 +%284 = OpIAdd %5 %283 %278 +%285 = OpIAdd %5 %284 %274 +%286 = OpIAdd %5 %285 %275 OpLine %3 108 12 -%289 = OpConvertUToF %8 %288 -%290 = OpCompositeConstruct %24 %289 %289 %289 %289 -OpStore %261 %290 +%287 = OpConvertUToF %8 %286 +%288 = OpCompositeConstruct %24 %287 %287 %287 %287 +OpStore %259 %288 OpReturn OpFunctionEnd -%293 = OpFunction %2 None %97 -%291 = OpLabel -%308 = OpVariable %309 Function %310 -%294 = OpLoad %16 %44 -%295 = OpLoad %17 %46 -%296 = OpLoad %19 %51 -%297 = OpLoad %21 %55 -%298 = OpLoad %25 %61 -OpBranch %311 -%311 = OpLabel +%291 = OpFunction %2 None %97 +%289 = OpLabel +%306 = OpVariable %307 Function %308 +%292 = OpLoad %16 %44 +%293 = OpLoad %17 %46 +%294 = OpLoad %19 %51 +%295 = OpLoad %21 %55 +%296 = OpLoad %25 %61 +OpBranch %309 +%309 = OpLabel OpLine %3 116 16 OpLine %3 117 17 OpLine %3 118 20 OpLine %3 121 5 -%313 = OpSampledImage %312 %294 %298 -%314 = OpImageSampleImplicitLod %24 %313 %299 -%315 = OpLoad %24 %308 -%316 = OpFAdd %24 %315 %314 +%311 = OpSampledImage %310 %292 %296 +%312 = OpImageSampleImplicitLod %24 %311 %297 +%313 = OpLoad %24 %306 +%314 = OpFAdd %24 %313 %312 OpLine %3 121 5 -OpStore %308 %316 +OpStore %306 %314 OpLine %3 122 5 -%318 = OpSampledImage %317 %295 %298 -%319 = OpImageSampleImplicitLod %24 %318 %301 -%320 = OpLoad %24 %308 -%321 = OpFAdd %24 %320 %319 +%316 = OpSampledImage %315 %293 %296 +%317 = OpImageSampleImplicitLod %24 %316 %299 +%318 = OpLoad %24 %306 +%319 = OpFAdd %24 %318 %317 OpLine %3 122 5 -OpStore %308 %321 +OpStore %306 %319 OpLine %3 123 5 -%322 = OpSampledImage %317 %295 %298 -%323 = OpImageSampleImplicitLod %24 %322 %301 ConstOffset %305 -%324 = OpLoad %24 %308 -%325 = OpFAdd %24 %324 %323 +%320 = OpSampledImage %315 %293 %296 +%321 = OpImageSampleImplicitLod %24 %320 %299 ConstOffset %303 +%322 = OpLoad %24 %306 +%323 = OpFAdd %24 %322 %321 OpLine %3 123 5 -OpStore %308 %325 +OpStore %306 %323 OpLine %3 124 5 -%326 = OpSampledImage %317 %295 %298 -%327 = OpImageSampleExplicitLod %24 %326 %301 Lod %306 -%328 = OpLoad %24 %308 -%329 = OpFAdd %24 %328 %327 +%324 = OpSampledImage %315 %293 %296 +%325 = OpImageSampleExplicitLod %24 %324 %299 Lod %304 +%326 = OpLoad %24 %306 +%327 = OpFAdd %24 %326 %325 OpLine %3 124 5 -OpStore %308 %329 +OpStore %306 %327 OpLine %3 125 5 -%330 = OpSampledImage %317 %295 %298 -%331 = OpImageSampleExplicitLod %24 %330 %301 Lod|ConstOffset %306 %305 -%332 = OpLoad %24 %308 -%333 = OpFAdd %24 %332 %331 +%328 = OpSampledImage %315 %293 %296 +%329 = OpImageSampleExplicitLod %24 %328 %299 Lod|ConstOffset %304 %303 +%330 = OpLoad %24 %306 +%331 = OpFAdd %24 %330 %329 OpLine %3 125 5 -OpStore %308 %333 +OpStore %306 %331 OpLine %3 126 5 -%334 = OpSampledImage %317 %295 %298 -%335 = OpImageSampleImplicitLod %24 %334 %301 Bias|ConstOffset %307 %305 -%336 = OpLoad %24 %308 -%337 = OpFAdd %24 %336 %335 +%332 = OpSampledImage %315 %293 %296 +%333 = OpImageSampleImplicitLod %24 %332 %299 Bias|ConstOffset %305 %303 +%334 = OpLoad %24 %306 +%335 = OpFAdd %24 %334 %333 OpLine %3 126 5 -OpStore %308 %337 +OpStore %306 %335 OpLine %3 127 5 -%339 = OpConvertUToF %8 %219 -%340 = OpCompositeConstruct %302 %301 %339 -%341 = OpSampledImage %338 %296 %298 -%342 = OpImageSampleImplicitLod %24 %341 %340 -%343 = OpLoad %24 %308 -%344 = OpFAdd %24 %343 %342 +%337 = OpConvertUToF %8 %217 +%338 = OpCompositeConstruct %300 %299 %337 +%339 = OpSampledImage %336 %294 %296 +%340 = OpImageSampleImplicitLod %24 %339 %338 +%341 = OpLoad %24 %306 +%342 = OpFAdd %24 %341 %340 OpLine %3 127 5 -OpStore %308 %344 +OpStore %306 %342 OpLine %3 128 5 -%345 = OpConvertUToF %8 %219 -%346 = OpCompositeConstruct %302 %301 %345 -%347 = OpSampledImage %338 %296 %298 -%348 = OpImageSampleImplicitLod %24 %347 %346 ConstOffset %305 -%349 = OpLoad %24 %308 -%350 = OpFAdd %24 %349 %348 +%343 = OpConvertUToF %8 %217 +%344 = OpCompositeConstruct %300 %299 %343 +%345 = OpSampledImage %336 %294 %296 +%346 = OpImageSampleImplicitLod %24 %345 %344 ConstOffset %303 +%347 = OpLoad %24 %306 +%348 = OpFAdd %24 %347 %346 OpLine %3 128 5 -OpStore %308 %350 +OpStore %306 %348 OpLine %3 129 5 -%351 = OpConvertUToF %8 %219 -%352 = OpCompositeConstruct %302 %301 %351 -%353 = OpSampledImage %338 %296 %298 -%354 = OpImageSampleExplicitLod %24 %353 %352 Lod %306 -%355 = OpLoad %24 %308 -%356 = OpFAdd %24 %355 %354 +%349 = OpConvertUToF %8 %217 +%350 = OpCompositeConstruct %300 %299 %349 +%351 = OpSampledImage %336 %294 %296 +%352 = OpImageSampleExplicitLod %24 %351 %350 Lod %304 +%353 = OpLoad %24 %306 +%354 = OpFAdd %24 %353 %352 OpLine %3 129 5 -OpStore %308 %356 +OpStore %306 %354 OpLine %3 130 5 -%357 = OpConvertUToF %8 %219 -%358 = OpCompositeConstruct %302 %301 %357 -%359 = OpSampledImage %338 %296 %298 -%360 = OpImageSampleExplicitLod %24 %359 %358 Lod|ConstOffset %306 %305 -%361 = OpLoad %24 %308 -%362 = OpFAdd %24 %361 %360 +%355 = OpConvertUToF %8 %217 +%356 = OpCompositeConstruct %300 %299 %355 +%357 = OpSampledImage %336 %294 %296 +%358 = OpImageSampleExplicitLod %24 %357 %356 Lod|ConstOffset %304 %303 +%359 = OpLoad %24 %306 +%360 = OpFAdd %24 %359 %358 OpLine %3 130 5 -OpStore %308 %362 +OpStore %306 %360 OpLine %3 131 5 -%363 = OpConvertUToF %8 %219 -%364 = OpCompositeConstruct %302 %301 %363 -%365 = OpSampledImage %338 %296 %298 -%366 = OpImageSampleImplicitLod %24 %365 %364 Bias|ConstOffset %307 %305 -%367 = OpLoad %24 %308 -%368 = OpFAdd %24 %367 %366 +%361 = OpConvertUToF %8 %217 +%362 = OpCompositeConstruct %300 %299 %361 +%363 = OpSampledImage %336 %294 %296 +%364 = OpImageSampleImplicitLod %24 %363 %362 Bias|ConstOffset %305 %303 +%365 = OpLoad %24 %306 +%366 = OpFAdd %24 %365 %364 OpLine %3 131 5 -OpStore %308 %368 +OpStore %306 %366 OpLine %3 132 5 -%369 = OpConvertSToF %8 %77 -%370 = OpCompositeConstruct %302 %301 %369 -%371 = OpSampledImage %338 %296 %298 -%372 = OpImageSampleImplicitLod %24 %371 %370 -%373 = OpLoad %24 %308 -%374 = OpFAdd %24 %373 %372 +%367 = OpConvertSToF %8 %77 +%368 = OpCompositeConstruct %300 %299 %367 +%369 = OpSampledImage %336 %294 %296 +%370 = OpImageSampleImplicitLod %24 %369 %368 +%371 = OpLoad %24 %306 +%372 = OpFAdd %24 %371 %370 OpLine %3 132 5 -OpStore %308 %374 +OpStore %306 %372 OpLine %3 133 5 -%375 = OpConvertSToF %8 %77 -%376 = OpCompositeConstruct %302 %301 %375 -%377 = OpSampledImage %338 %296 %298 -%378 = OpImageSampleImplicitLod %24 %377 %376 ConstOffset %305 -%379 = OpLoad %24 %308 -%380 = OpFAdd %24 %379 %378 +%373 = OpConvertSToF %8 %77 +%374 = OpCompositeConstruct %300 %299 %373 +%375 = OpSampledImage %336 %294 %296 +%376 = OpImageSampleImplicitLod %24 %375 %374 ConstOffset %303 +%377 = OpLoad %24 %306 +%378 = OpFAdd %24 %377 %376 OpLine %3 133 5 -OpStore %308 %380 +OpStore %306 %378 OpLine %3 134 5 -%381 = OpConvertSToF %8 %77 -%382 = OpCompositeConstruct %302 %301 %381 -%383 = OpSampledImage %338 %296 %298 -%384 = OpImageSampleExplicitLod %24 %383 %382 Lod %306 -%385 = OpLoad %24 %308 -%386 = OpFAdd %24 %385 %384 +%379 = OpConvertSToF %8 %77 +%380 = OpCompositeConstruct %300 %299 %379 +%381 = OpSampledImage %336 %294 %296 +%382 = OpImageSampleExplicitLod %24 %381 %380 Lod %304 +%383 = OpLoad %24 %306 +%384 = OpFAdd %24 %383 %382 OpLine %3 134 5 -OpStore %308 %386 +OpStore %306 %384 OpLine %3 135 5 -%387 = OpConvertSToF %8 %77 -%388 = OpCompositeConstruct %302 %301 %387 -%389 = OpSampledImage %338 %296 %298 -%390 = OpImageSampleExplicitLod %24 %389 %388 Lod|ConstOffset %306 %305 -%391 = OpLoad %24 %308 -%392 = OpFAdd %24 %391 %390 +%385 = OpConvertSToF %8 %77 +%386 = OpCompositeConstruct %300 %299 %385 +%387 = OpSampledImage %336 %294 %296 +%388 = OpImageSampleExplicitLod %24 %387 %386 Lod|ConstOffset %304 %303 +%389 = OpLoad %24 %306 +%390 = OpFAdd %24 %389 %388 OpLine %3 135 5 -OpStore %308 %392 +OpStore %306 %390 OpLine %3 136 5 -%393 = OpConvertSToF %8 %77 -%394 = OpCompositeConstruct %302 %301 %393 -%395 = OpSampledImage %338 %296 %298 -%396 = OpImageSampleImplicitLod %24 %395 %394 Bias|ConstOffset %307 %305 -%397 = OpLoad %24 %308 -%398 = OpFAdd %24 %397 %396 +%391 = OpConvertSToF %8 %77 +%392 = OpCompositeConstruct %300 %299 %391 +%393 = OpSampledImage %336 %294 %296 +%394 = OpImageSampleImplicitLod %24 %393 %392 Bias|ConstOffset %305 %303 +%395 = OpLoad %24 %306 +%396 = OpFAdd %24 %395 %394 OpLine %3 136 5 -OpStore %308 %398 +OpStore %306 %396 OpLine %3 137 5 -%400 = OpConvertUToF %8 %219 -%401 = OpCompositeConstruct %24 %303 %400 -%402 = OpSampledImage %399 %297 %298 -%403 = OpImageSampleImplicitLod %24 %402 %401 -%404 = OpLoad %24 %308 -%405 = OpFAdd %24 %404 %403 +%398 = OpConvertUToF %8 %217 +%399 = OpCompositeConstruct %24 %301 %398 +%400 = OpSampledImage %397 %295 %296 +%401 = OpImageSampleImplicitLod %24 %400 %399 +%402 = OpLoad %24 %306 +%403 = OpFAdd %24 %402 %401 OpLine %3 137 5 -OpStore %308 %405 +OpStore %306 %403 OpLine %3 138 5 -%406 = OpConvertUToF %8 %219 -%407 = OpCompositeConstruct %24 %303 %406 -%408 = OpSampledImage %399 %297 %298 -%409 = OpImageSampleExplicitLod %24 %408 %407 Lod %306 -%410 = OpLoad %24 %308 -%411 = OpFAdd %24 %410 %409 +%404 = OpConvertUToF %8 %217 +%405 = OpCompositeConstruct %24 %301 %404 +%406 = OpSampledImage %397 %295 %296 +%407 = OpImageSampleExplicitLod %24 %406 %405 Lod %304 +%408 = OpLoad %24 %306 +%409 = OpFAdd %24 %408 %407 OpLine %3 138 5 -OpStore %308 %411 +OpStore %306 %409 OpLine %3 139 5 -%412 = OpConvertUToF %8 %219 -%413 = OpCompositeConstruct %24 %303 %412 -%414 = OpSampledImage %399 %297 %298 -%415 = OpImageSampleImplicitLod %24 %414 %413 Bias %307 -%416 = OpLoad %24 %308 -%417 = OpFAdd %24 %416 %415 +%410 = OpConvertUToF %8 %217 +%411 = OpCompositeConstruct %24 %301 %410 +%412 = OpSampledImage %397 %295 %296 +%413 = OpImageSampleImplicitLod %24 %412 %411 Bias %305 +%414 = OpLoad %24 %306 +%415 = OpFAdd %24 %414 %413 OpLine %3 139 5 -OpStore %308 %417 +OpStore %306 %415 OpLine %3 140 5 -%418 = OpConvertSToF %8 %77 -%419 = OpCompositeConstruct %24 %303 %418 -%420 = OpSampledImage %399 %297 %298 -%421 = OpImageSampleImplicitLod %24 %420 %419 -%422 = OpLoad %24 %308 -%423 = OpFAdd %24 %422 %421 +%416 = OpConvertSToF %8 %77 +%417 = OpCompositeConstruct %24 %301 %416 +%418 = OpSampledImage %397 %295 %296 +%419 = OpImageSampleImplicitLod %24 %418 %417 +%420 = OpLoad %24 %306 +%421 = OpFAdd %24 %420 %419 OpLine %3 140 5 -OpStore %308 %423 +OpStore %306 %421 OpLine %3 141 5 -%424 = OpConvertSToF %8 %77 -%425 = OpCompositeConstruct %24 %303 %424 -%426 = OpSampledImage %399 %297 %298 -%427 = OpImageSampleExplicitLod %24 %426 %425 Lod %306 -%428 = OpLoad %24 %308 -%429 = OpFAdd %24 %428 %427 +%422 = OpConvertSToF %8 %77 +%423 = OpCompositeConstruct %24 %301 %422 +%424 = OpSampledImage %397 %295 %296 +%425 = OpImageSampleExplicitLod %24 %424 %423 Lod %304 +%426 = OpLoad %24 %306 +%427 = OpFAdd %24 %426 %425 OpLine %3 141 5 -OpStore %308 %429 +OpStore %306 %427 OpLine %3 142 5 -%430 = OpConvertSToF %8 %77 -%431 = OpCompositeConstruct %24 %303 %430 -%432 = OpSampledImage %399 %297 %298 -%433 = OpImageSampleImplicitLod %24 %432 %431 Bias %307 -%434 = OpLoad %24 %308 -%435 = OpFAdd %24 %434 %433 +%428 = OpConvertSToF %8 %77 +%429 = OpCompositeConstruct %24 %301 %428 +%430 = OpSampledImage %397 %295 %296 +%431 = OpImageSampleImplicitLod %24 %430 %429 Bias %305 +%432 = OpLoad %24 %306 +%433 = OpFAdd %24 %432 %431 OpLine %3 142 5 -OpStore %308 %435 +OpStore %306 %433 OpLine %3 1 1 -%436 = OpLoad %24 %308 -OpStore %292 %436 +%434 = OpLoad %24 %306 +OpStore %290 %434 OpReturn OpFunctionEnd -%440 = OpFunction %2 None %97 -%437 = OpLabel -%445 = OpVariable %446 Function %447 -%441 = OpLoad %25 %63 -%442 = OpLoad %26 %64 -%443 = OpLoad %27 %66 -%444 = OpLoad %28 %68 -OpBranch %448 -%448 = OpLabel +%438 = OpFunction %2 None %97 +%435 = OpLabel +%443 = OpVariable %444 Function %445 +%439 = OpLoad %25 %63 +%440 = OpLoad %26 %64 +%441 = OpLoad %27 %66 +%442 = OpLoad %28 %68 +OpBranch %446 +%446 = OpLabel OpLine %3 157 14 OpLine %3 158 15 OpLine %3 161 5 -%450 = OpSampledImage %449 %442 %441 -%451 = OpImageSampleDrefImplicitLod %8 %450 %301 %299 -%452 = OpLoad %8 %445 -%453 = OpFAdd %8 %452 %451 +%448 = OpSampledImage %447 %440 %439 +%449 = OpImageSampleDrefImplicitLod %8 %448 %299 %297 +%450 = OpLoad %8 %443 +%451 = OpFAdd %8 %450 %449 OpLine %3 161 5 -OpStore %445 %453 +OpStore %443 %451 OpLine %3 162 5 -%455 = OpConvertUToF %8 %219 -%456 = OpCompositeConstruct %302 %301 %455 -%457 = OpSampledImage %454 %443 %441 -%458 = OpImageSampleDrefImplicitLod %8 %457 %456 %299 -%459 = OpLoad %8 %445 -%460 = OpFAdd %8 %459 %458 +%453 = OpConvertUToF %8 %217 +%454 = OpCompositeConstruct %300 %299 %453 +%455 = OpSampledImage %452 %441 %439 +%456 = OpImageSampleDrefImplicitLod %8 %455 %454 %297 +%457 = OpLoad %8 %443 +%458 = OpFAdd %8 %457 %456 OpLine %3 162 5 -OpStore %445 %460 +OpStore %443 %458 OpLine %3 163 5 -%461 = OpConvertSToF %8 %77 -%462 = OpCompositeConstruct %302 %301 %461 -%463 = OpSampledImage %454 %443 %441 -%464 = OpImageSampleDrefImplicitLod %8 %463 %462 %299 -%465 = OpLoad %8 %445 -%466 = OpFAdd %8 %465 %464 +%459 = OpConvertSToF %8 %77 +%460 = OpCompositeConstruct %300 %299 %459 +%461 = OpSampledImage %452 %441 %439 +%462 = OpImageSampleDrefImplicitLod %8 %461 %460 %297 +%463 = OpLoad %8 %443 +%464 = OpFAdd %8 %463 %462 OpLine %3 163 5 -OpStore %445 %466 +OpStore %443 %464 OpLine %3 164 5 -%468 = OpSampledImage %467 %444 %441 -%469 = OpImageSampleDrefImplicitLod %8 %468 %303 %299 -%470 = OpLoad %8 %445 -%471 = OpFAdd %8 %470 %469 +%466 = OpSampledImage %465 %442 %439 +%467 = OpImageSampleDrefImplicitLod %8 %466 %301 %297 +%468 = OpLoad %8 %443 +%469 = OpFAdd %8 %468 %467 OpLine %3 164 5 -OpStore %445 %471 +OpStore %443 %469 OpLine %3 165 5 -%472 = OpSampledImage %449 %442 %441 -%473 = OpImageSampleDrefExplicitLod %8 %472 %301 %299 Lod %474 -%475 = OpLoad %8 %445 -%476 = OpFAdd %8 %475 %473 +%470 = OpSampledImage %447 %440 %439 +%471 = OpImageSampleDrefExplicitLod %8 %470 %299 %297 Lod %472 +%473 = OpLoad %8 %443 +%474 = OpFAdd %8 %473 %471 OpLine %3 165 5 -OpStore %445 %476 +OpStore %443 %474 OpLine %3 166 5 -%477 = OpConvertUToF %8 %219 -%478 = OpCompositeConstruct %302 %301 %477 -%479 = OpSampledImage %454 %443 %441 -%480 = OpImageSampleDrefExplicitLod %8 %479 %478 %299 Lod %474 -%481 = OpLoad %8 %445 -%482 = OpFAdd %8 %481 %480 +%475 = OpConvertUToF %8 %217 +%476 = OpCompositeConstruct %300 %299 %475 +%477 = OpSampledImage %452 %441 %439 +%478 = OpImageSampleDrefExplicitLod %8 %477 %476 %297 Lod %472 +%479 = OpLoad %8 %443 +%480 = OpFAdd %8 %479 %478 OpLine %3 166 5 -OpStore %445 %482 +OpStore %443 %480 OpLine %3 167 5 -%483 = OpConvertSToF %8 %77 -%484 = OpCompositeConstruct %302 %301 %483 -%485 = OpSampledImage %454 %443 %441 -%486 = OpImageSampleDrefExplicitLod %8 %485 %484 %299 Lod %474 -%487 = OpLoad %8 %445 -%488 = OpFAdd %8 %487 %486 +%481 = OpConvertSToF %8 %77 +%482 = OpCompositeConstruct %300 %299 %481 +%483 = OpSampledImage %452 %441 %439 +%484 = OpImageSampleDrefExplicitLod %8 %483 %482 %297 Lod %472 +%485 = OpLoad %8 %443 +%486 = OpFAdd %8 %485 %484 OpLine %3 167 5 -OpStore %445 %488 +OpStore %443 %486 OpLine %3 168 5 -%489 = OpSampledImage %467 %444 %441 -%490 = OpImageSampleDrefExplicitLod %8 %489 %303 %299 Lod %474 -%491 = OpLoad %8 %445 -%492 = OpFAdd %8 %491 %490 +%487 = OpSampledImage %465 %442 %439 +%488 = OpImageSampleDrefExplicitLod %8 %487 %301 %297 Lod %472 +%489 = OpLoad %8 %443 +%490 = OpFAdd %8 %489 %488 OpLine %3 168 5 -OpStore %445 %492 +OpStore %443 %490 OpLine %3 1 1 -%493 = OpLoad %8 %445 -OpStore %438 %493 +%491 = OpLoad %8 %443 +OpStore %436 %491 OpReturn OpFunctionEnd -%496 = OpFunction %2 None %97 -%494 = OpLabel -%497 = OpLoad %17 %46 -%498 = OpLoad %4 %48 -%499 = OpLoad %18 %49 -%500 = OpLoad %25 %61 -%501 = OpLoad %25 %63 -%502 = OpLoad %26 %64 -OpBranch %503 -%503 = OpLabel +%494 = OpFunction %2 None %97 +%492 = OpLabel +%495 = OpLoad %17 %46 +%496 = OpLoad %4 %48 +%497 = OpLoad %18 %49 +%498 = OpLoad %25 %61 +%499 = OpLoad %25 %63 +%500 = OpLoad %26 %64 +OpBranch %501 +%501 = OpLabel OpLine %3 174 14 OpLine %3 176 15 -%504 = OpSampledImage %317 %497 %500 -%505 = OpImageGather %24 %504 %301 %506 +%502 = OpSampledImage %315 %495 %498 +%503 = OpImageGather %24 %502 %299 %504 OpLine %3 177 22 -%507 = OpSampledImage %317 %497 %500 -%508 = OpImageGather %24 %507 %301 %509 ConstOffset %305 +%505 = OpSampledImage %315 %495 %498 +%506 = OpImageGather %24 %505 %299 %507 ConstOffset %303 OpLine %3 178 21 -%510 = OpSampledImage %449 %502 %501 -%511 = OpImageDrefGather %24 %510 %301 %299 +%508 = OpSampledImage %447 %500 %499 +%509 = OpImageDrefGather %24 %508 %299 %297 OpLine %3 179 28 -%512 = OpSampledImage %449 %502 %501 -%513 = OpImageDrefGather %24 %512 %301 %299 ConstOffset %305 +%510 = OpSampledImage %447 %500 %499 +%511 = OpImageDrefGather %24 %510 %299 %297 ConstOffset %303 OpLine %3 181 13 -%515 = OpSampledImage %514 %498 %500 -%516 = OpImageGather %116 %515 %301 %219 +%513 = OpSampledImage %512 %496 %498 +%514 = OpImageGather %116 %513 %299 %217 OpLine %3 182 13 -%519 = OpSampledImage %518 %499 %500 -%520 = OpImageGather %517 %519 %301 %219 +%517 = OpSampledImage %516 %497 %498 +%518 = OpImageGather %515 %517 %299 %217 OpLine %3 183 13 -%521 = OpConvertUToF %24 %516 -%522 = OpConvertSToF %24 %520 -%523 = OpFAdd %24 %521 %522 +%519 = OpConvertUToF %24 %514 +%520 = OpConvertSToF %24 %518 +%521 = OpFAdd %24 %519 %520 OpLine %3 185 12 -%524 = OpFAdd %24 %505 %508 -%525 = OpFAdd %24 %524 %511 -%526 = OpFAdd %24 %525 %513 -%527 = OpFAdd %24 %526 %523 -OpStore %495 %527 +%522 = OpFAdd %24 %503 %506 +%523 = OpFAdd %24 %522 %509 +%524 = OpFAdd %24 %523 %511 +%525 = OpFAdd %24 %524 %521 +OpStore %493 %525 OpReturn OpFunctionEnd -%530 = OpFunction %2 None %97 -%528 = OpLabel -%531 = OpLoad %25 %61 -%532 = OpLoad %26 %64 -OpBranch %533 -%533 = OpLabel +%528 = OpFunction %2 None %97 +%526 = OpLabel +%529 = OpLoad %25 %61 +%530 = OpLoad %26 %64 +OpBranch %531 +%531 = OpLabel OpLine %3 190 14 OpLine %3 192 15 -%534 = OpSampledImage %449 %532 %531 -%535 = OpImageSampleImplicitLod %24 %534 %301 -%536 = OpCompositeExtract %8 %535 0 +%532 = OpSampledImage %447 %530 %529 +%533 = OpImageSampleImplicitLod %24 %532 %299 +%534 = OpCompositeExtract %8 %533 0 OpLine %3 193 22 -%537 = OpSampledImage %449 %532 %531 -%538 = OpImageGather %24 %537 %301 %219 +%535 = OpSampledImage %447 %530 %529 +%536 = OpImageGather %24 %535 %299 %217 OpLine %3 194 21 -%539 = OpSampledImage %449 %532 %531 -%541 = OpConvertSToF %8 %88 -%540 = OpImageSampleExplicitLod %24 %539 %301 Lod %541 -%542 = OpCompositeExtract %8 %540 0 +%537 = OpSampledImage %447 %530 %529 +%539 = OpConvertSToF %8 %88 +%538 = OpImageSampleExplicitLod %24 %537 %299 Lod %539 +%540 = OpCompositeExtract %8 %538 0 OpLine %3 192 15 -%543 = OpCompositeConstruct %24 %536 %536 %536 %536 -%544 = OpFAdd %24 %543 %538 -%545 = OpCompositeConstruct %24 %542 %542 %542 %542 -%546 = OpFAdd %24 %544 %545 -OpStore %529 %546 +%541 = OpCompositeConstruct %24 %534 %534 %534 %534 +%542 = OpFAdd %24 %541 %536 +%543 = OpCompositeConstruct %24 %540 %540 %540 %540 +%544 = OpFAdd %24 %542 %543 +OpStore %527 %544 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl index 25f894e7f5..574c35ab0b 100644 --- a/naga/tests/out/wgsl/extra.wgsl +++ b/naga/tests/out/wgsl/extra.wgsl @@ -1,6 +1,6 @@ struct PushConstants { index: u32, - double: vec2, + double: vec2, } struct FragmentIn { diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl new file mode 100644 index 0000000000..e12dcb1408 --- /dev/null +++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl @@ -0,0 +1,47 @@ +enable f16; + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +@group(0) @binding(0) +var global: A; +@group(0) @binding(1) +var global_1: B; + +fn main_1() { + let _e16 = global.a_1_; + global_1.b_1_ = _e16; + let _e17 = global.a_vec2_; + global_1.b_vec2_ = _e17; + let _e18 = global.a_vec3_; + global_1.b_vec3_ = _e18; + let _e19 = global.a_vec4_; + global_1.b_vec4_ = _e19; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); + return; +} diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl new file mode 100644 index 0000000000..4c29ace8c3 --- /dev/null +++ b/naga/tests/out/wgsl/f16-spv.wgsl @@ -0,0 +1,46 @@ +enable f16; + +struct B { + b_1_: f16, + b_vec2_: vec2, + b_vec3_: vec3, + b_vec4_: vec4, + b_mat2_: mat2x2, + b_mat2x3_: mat2x3, + b_mat2x4_: mat2x4, + b_mat3x2_: mat3x2, + b_mat3_: mat3x3, + b_mat3x4_: mat3x4, + b_mat4x2_: mat4x2, + b_mat4x3_: mat4x3, + b_mat4_: mat4x4, +} + +struct A { + a_1_: f16, + a_vec2_: vec2, + a_vec3_: vec3, + a_vec4_: vec4, +} + +@group(0) @binding(1) +var unnamed: B; +@group(0) @binding(0) +var unnamed_1: A; + +fn main_1() { + let _e3 = unnamed_1.a_1_; + unnamed.b_1_ = _e3; + let _e6 = unnamed_1.a_vec2_; + unnamed.b_vec2_ = _e6; + let _e9 = unnamed_1.a_vec3_; + unnamed.b_vec3_ = _e9; + let _e12 = unnamed_1.a_vec4_; + unnamed.b_vec4_ = _e12; + return; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + main_1(); +} diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl new file mode 100644 index 0000000000..a45edce3b5 --- /dev/null +++ b/naga/tests/out/wgsl/f16.wgsl @@ -0,0 +1,167 @@ +enable f16; + +struct UniformCompatible { + val_u32_: u32, + val_i32_: i32, + val_f32_: f32, + val_f16_: f16, + val_f16_2_: vec2, + val_f16_3_: vec3, + val_f16_4_: vec4, + final_value: f16, + val_mat2x2_: mat2x2, + val_mat2x3_: mat2x3, + val_mat2x4_: mat2x4, + val_mat3x2_: mat3x2, + val_mat3x3_: mat3x3, + val_mat3x4_: mat3x4, + val_mat4x2_: mat4x2, + val_mat4x3_: mat4x3, + val_mat4x4_: mat4x4, +} + +struct StorageCompatible { + val_f16_array_2_: array, +} + +struct LayoutTest { + scalar1_: f16, + scalar2_: f16, + v3_: vec3, + tuck_in: f16, + scalar4_: f16, + larger: u32, +} + +const constant_variable: f16 = 15.203125h; + +var private_variable: f16 = 1h; +@group(0) @binding(0) +var input_uniform: UniformCompatible; +@group(0) @binding(1) +var input_storage: UniformCompatible; +@group(0) @binding(2) +var input_arrays: StorageCompatible; +@group(0) @binding(3) +var output: UniformCompatible; +@group(0) @binding(4) +var output_arrays: StorageCompatible; + +fn f16_function(x: f16) -> f16 { + var val: f16 = 15.203125h; + + let _e4 = val; + val = (_e4 + -33344h); + let _e6 = val; + let _e9 = val; + val = (_e9 + (_e6 + 5h)); + let _e13 = input_uniform.val_f32_; + let _e14 = val; + let _e18 = val; + val = (_e18 + f16((_e13 + f32(_e14)))); + let _e22 = input_uniform.val_f16_; + let _e25 = val; + val = (_e25 + vec3(_e22).z); + let _e31 = input_uniform.val_f16_; + let _e34 = input_storage.val_f16_; + output.val_f16_ = (_e31 + _e34); + let _e40 = input_uniform.val_f16_2_; + let _e43 = input_storage.val_f16_2_; + output.val_f16_2_ = (_e40 + _e43); + let _e49 = input_uniform.val_f16_3_; + let _e52 = input_storage.val_f16_3_; + output.val_f16_3_ = (_e49 + _e52); + let _e58 = input_uniform.val_f16_4_; + let _e61 = input_storage.val_f16_4_; + output.val_f16_4_ = (_e58 + _e61); + let _e67 = input_uniform.val_mat2x2_; + let _e70 = input_storage.val_mat2x2_; + output.val_mat2x2_ = (_e67 + _e70); + let _e76 = input_uniform.val_mat2x3_; + let _e79 = input_storage.val_mat2x3_; + output.val_mat2x3_ = (_e76 + _e79); + let _e85 = input_uniform.val_mat2x4_; + let _e88 = input_storage.val_mat2x4_; + output.val_mat2x4_ = (_e85 + _e88); + let _e94 = input_uniform.val_mat3x2_; + let _e97 = input_storage.val_mat3x2_; + output.val_mat3x2_ = (_e94 + _e97); + let _e103 = input_uniform.val_mat3x3_; + let _e106 = input_storage.val_mat3x3_; + output.val_mat3x3_ = (_e103 + _e106); + let _e112 = input_uniform.val_mat3x4_; + let _e115 = input_storage.val_mat3x4_; + output.val_mat3x4_ = (_e112 + _e115); + let _e121 = input_uniform.val_mat4x2_; + let _e124 = input_storage.val_mat4x2_; + output.val_mat4x2_ = (_e121 + _e124); + let _e130 = input_uniform.val_mat4x3_; + let _e133 = input_storage.val_mat4x3_; + output.val_mat4x3_ = (_e130 + _e133); + let _e139 = input_uniform.val_mat4x4_; + let _e142 = input_storage.val_mat4x4_; + output.val_mat4x4_ = (_e139 + _e142); + let _e148 = input_arrays.val_f16_array_2_; + output_arrays.val_f16_array_2_ = _e148; + let _e149 = val; + let _e151 = val; + val = (_e151 + abs(_e149)); + let _e153 = val; + let _e154 = val; + let _e155 = val; + let _e157 = val; + val = (_e157 + clamp(_e153, _e154, _e155)); + let _e159 = val; + let _e161 = val; + let _e164 = val; + val = (_e164 + dot(vec2(_e159), vec2(_e161))); + let _e166 = val; + let _e167 = val; + let _e169 = val; + val = (_e169 + max(_e166, _e167)); + let _e171 = val; + let _e172 = val; + let _e174 = val; + val = (_e174 + min(_e171, _e172)); + let _e176 = val; + let _e178 = val; + val = (_e178 + sign(_e176)); + let _e181 = val; + val = (_e181 + 1h); + let _e185 = input_uniform.val_f16_2_; + let float_vec2_ = vec2(_e185); + output.val_f16_2_ = vec2(float_vec2_); + let _e192 = input_uniform.val_f16_3_; + let float_vec3_ = vec3(_e192); + output.val_f16_3_ = vec3(float_vec3_); + let _e199 = input_uniform.val_f16_4_; + let float_vec4_ = vec4(_e199); + output.val_f16_4_ = vec4(float_vec4_); + let _e208 = input_uniform.val_mat2x2_; + output.val_mat2x2_ = mat2x2(mat2x2(_e208)); + let _e215 = input_uniform.val_mat2x3_; + output.val_mat2x3_ = mat2x3(mat2x3(_e215)); + let _e222 = input_uniform.val_mat2x4_; + output.val_mat2x4_ = mat2x4(mat2x4(_e222)); + let _e229 = input_uniform.val_mat3x2_; + output.val_mat3x2_ = mat3x2(mat3x2(_e229)); + let _e236 = input_uniform.val_mat3x3_; + output.val_mat3x3_ = mat3x3(mat3x3(_e236)); + let _e243 = input_uniform.val_mat3x4_; + output.val_mat3x4_ = mat3x4(mat3x4(_e243)); + let _e250 = input_uniform.val_mat4x2_; + output.val_mat4x2_ = mat4x2(mat4x2(_e250)); + let _e257 = input_uniform.val_mat4x3_; + output.val_mat4x3_ = mat4x3(mat4x3(_e257)); + let _e264 = input_uniform.val_mat4x4_; + output.val_mat4x4_ = mat4x4(mat4x4(_e264)); + let _e267 = val; + return _e267; +} + +@compute @workgroup_size(1, 1, 1) +fn main() { + let _e3 = f16_function(2h); + output.final_value = _e3; + return; +} diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs index f221c7896e..2d46e37f72 100644 --- a/naga/tests/spirv_capabilities.rs +++ b/naga/tests/spirv_capabilities.rs @@ -208,3 +208,8 @@ fn int64() { "#, ); } + +#[test] +fn float16() { + require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); +} diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index 2447208b2a..e1d0a42234 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -8,7 +8,8 @@ use naga::valid::Capabilities; #[track_caller] fn check(input: &str, snapshot: &str) { let output = naga::front::wgsl::parse_str(input) - .expect_err("expected parser error") + .map(|_| panic!("expected parser error, but parsing succeeded!")) + .unwrap_err() .emit_to_string(input); if output != snapshot { for diff in diff::lines(snapshot, &output) { @@ -22,6 +23,19 @@ fn check(input: &str, snapshot: &str) { } } +#[track_caller] +fn check_success(input: &str) { + match naga::front::wgsl::parse_str(input) { + Ok(_) => {} + Err(err) => { + panic!( + "expected success, but parsing failed with:\n{}", + err.emit_to_string(input) + ); + } + } +} + #[test] fn very_negative_integers() { // wgpu#4492 @@ -827,6 +841,50 @@ fn matrix_constructor_inferred() { ); } +#[test] +fn float16_requires_enable() { + check( + r#" + const a: f16 = 1.0; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:22 + │ +2 │ const a: f16 = 1.0; + │ ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); + + check( + r#" + const a = 1.0h; + "#, + r#"error: the `f16` language extension is not enabled + ┌─ wgsl:2:23 + │ +2 │ const a = 1.0h; + │ ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable f16;` at the top of the shader. + +"#, + ); +} + +#[test] +fn multiple_enables_valid() { + check_success( + r#" + enable f16; + enable f16; + const a: f16 = 1.0h; + "#, + ); +} + /// Check the result of validating a WGSL program against a pattern. /// /// Unless you are generating code programmatically, the @@ -932,6 +990,53 @@ fn int64_capability() { } } +#[test] +fn float16_capability() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}), + .. + }) + } +} + +#[test] +fn float16_in_push_constant() { + check_validation! { + "enable f16; var input: f16;", + "enable f16; var input: vec2;", + "enable f16; var input: mat4x4;", + "enable f16; struct S { a: f16 }; var input: S;", + "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var input: S2;": + Err(naga::valid::ValidationError::GlobalVariable { + source: naga::valid::GlobalVariableError::InvalidPushConstantType( + naga::valid::PushConstantError::InvalidScalar( + naga::Scalar::F16 + ) + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT + } +} + +#[test] +fn float16_in_atomic() { + check_validation! { + "enable f16; var a: atomic;": + Err(naga::valid::ValidationError::Type { + source: naga::valid::TypeError::InvalidAtomicWidth( + naga::ScalarKind::Float, + 2 + ), + .. + }), + naga::valid::Capabilities::SHADER_FLOAT16 + } +} + #[test] fn invalid_arrays() { check_validation! { diff --git a/tests/Cargo.toml b/tests/Cargo.toml index abdab2b852..fe1f98a050 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -49,6 +49,7 @@ cfg-if.workspace = true ctor.workspace = true futures-lite.workspace = true glam.workspace = true +half = { workspace = true, features = ["bytemuck"] } itertools.workspace = true image.workspace = true libtest-mimic.workspace = true diff --git a/tests/gpu-tests/shader/mod.rs b/tests/gpu-tests/shader/mod.rs index 9a3bae0d40..15d4c5e5cf 100644 --- a/tests/gpu-tests/shader/mod.rs +++ b/tests/gpu-tests/shader/mod.rs @@ -143,8 +143,8 @@ impl ShaderTest { body, input_type: String::from("CustomStruct"), output_type: String::from("array"), - input_values: bytemuck::cast_slice(input_values).to_vec(), - output_values: vec![bytemuck::cast_slice(output_values).to_vec()], + input_values: bytemuck::pod_collect_to_vec(input_values), + output_values: vec![bytemuck::pod_collect_to_vec(output_values)], output_comparison_fn: Self::default_comparison_function::, output_initialization: u32::MAX, failures: Backends::empty(), diff --git a/tests/gpu-tests/shader/struct_layout.rs b/tests/gpu-tests/shader/struct_layout.rs index 38a040fcad..25bd32bf77 100644 --- a/tests/gpu-tests/shader/struct_layout.rs +++ b/tests/gpu-tests/shader/struct_layout.rs @@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits}; use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE}; use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; +#[gpu_test] +static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 + .expect_fail( + FailureCase::backend(wgpu::Backends::VULKAN) + .validation_error("a matrix with stride 8 not satisfying alignment to 16"), + ) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Uniform, + create_struct_layout_tests(InputStorageType::Uniform), + ) + }); + +#[gpu_test] +static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_struct_layout_tests(InputStorageType::Storage), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_struct_layout_tests(InputStorageType::PushConstant), + ) + }); + fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec tests } +#[gpu_test] +static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_64bit_struct_layout_tests(), + ) + }); + +#[gpu_test] +static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits { + max_push_constant_size: MAX_BUFFER_SIZE as u32, + ..Limits::downlevel_defaults() + }), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::PushConstant, + create_64bit_struct_layout_tests(), + ) + }); + fn create_64bit_struct_layout_tests() -> Vec { let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect(); @@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec { } #[gpu_test] -static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371 - .expect_fail( - FailureCase::backend(wgpu::Backends::VULKAN) - .validation_error("a matrix with stride 8 not satisfying alignment to 16"), - ) .limits(Limits::downlevel_defaults()), ) .run_async(|ctx| { shader_input_output_test( ctx, - InputStorageType::Uniform, - create_struct_layout_tests(InputStorageType::Uniform), + InputStorageType::Storage, + create_16bit_struct_layout_test(), ) }); #[gpu_test] -static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() +static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new() .parameters( TestParameters::default() + .features(Features::SHADER_F16) .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) .limits(Limits::downlevel_defaults()), ) @@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() shader_input_output_test( ctx, InputStorageType::Storage, - create_struct_layout_tests(InputStorageType::Storage), + create_16bit_struct_layout_test(), ) }); -#[gpu_test] -static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_struct_layout_tests(InputStorageType::PushConstant), - ) - }); +fn create_16bit_struct_layout_test() -> Vec { + let mut tests = Vec::new(); -#[gpu_test] -static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + fn f16asu16(f32: f32) -> u16 { + half::f16::from_f32(f32).to_bits() + } -#[gpu_test] -static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits::downlevel_defaults()), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::Storage, - create_64bit_struct_layout_tests(), - ) - }); + // 16 bit alignment tests + { + let members = + "scalar1: f16, scalar2: f16, v3: vec3, tuck_in: f16, scalar4: f16, larger: u32"; + let direct = String::from( + "\ + output[0] = u32(input.scalar1); + output[1] = u32(input.scalar2); + output[2] = u32(input.v3.x); + output[3] = u32(input.v3.y); + output[4] = u32(input.v3.z); + output[5] = u32(input.tuck_in); + output[6] = u32(input.scalar4); + output[7] = u32(extractBits(input.larger, 0u, 16u)); + output[8] = u32(extractBits(input.larger, 16u, 16u)); + ", + ); -#[gpu_test] -static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters( - TestParameters::default() - .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS) - .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) - .limits(Limits { - max_push_constant_size: MAX_BUFFER_SIZE as u32, - ..Limits::downlevel_defaults() - }), - ) - .run_async(|ctx| { - shader_input_output_test( - ctx, - InputStorageType::PushConstant, - create_64bit_struct_layout_tests(), - ) - }); + tests.push(ShaderTest::new( + "f16 alignment".into(), + members.into(), + direct, + &[ + f16asu16(0.0), + f16asu16(1.0), + f16asu16(2.0), + f16asu16(3.0), + f16asu16(4.0), + f16asu16(5.0), + f16asu16(6.0), + f16asu16(7.0), + f16asu16(8.0), + f16asu16(9.0), + 10_u16, + 11_u16, + // Some extra values to help debug if the test fails. + 12_u16, + 13_u16, + 14_u16, + 15_u16, + 16_u16, + 17_u16, + 18_u16, + 19_u16, + 20_u16, + ], + &[ + 0, // scalar1 + 1, // scalar2 + 4, 5, 6, // v3 + 7, // tuck_in + 8, // scalar4 + 10, // larger[0..16] + 11, // larger[16..32] + ], + )); + } + + // Matrix tests + { + let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h"; + let direct = String::from( + "\ + output[0] = u32(input.m2[0].x); + output[1] = u32(input.m2[0].y); + output[2] = u32(input.m2[1].x); + output[3] = u32(input.m2[1].y); + + output[4] = u32(input.m3[0].x); + output[5] = u32(input.m3[0].y); + output[6] = u32(input.m3[0].z); + output[7] = u32(input.m3[1].x); + output[8] = u32(input.m3[1].y); + output[9] = u32(input.m3[1].z); + output[10] = u32(input.m3[2].x); + output[11] = u32(input.m3[2].y); + output[12] = u32(input.m3[2].z); + + output[13] = u32(input.m4[0].x); + output[14] = u32(input.m4[0].y); + output[15] = u32(input.m4[0].z); + output[16] = u32(input.m4[0].w); + output[17] = u32(input.m4[1].x); + output[18] = u32(input.m4[1].y); + output[19] = u32(input.m4[1].z); + output[20] = u32(input.m4[1].w); + output[21] = u32(input.m4[2].x); + output[22] = u32(input.m4[2].y); + output[23] = u32(input.m4[2].z); + output[24] = u32(input.m4[2].w); + output[25] = u32(input.m4[3].x); + output[26] = u32(input.m4[3].y); + output[27] = u32(input.m4[3].z); + output[28] = u32(input.m4[3].w); + ", + ); + + tests.push(ShaderTest::new( + "f16 matrix alignment".into(), + members.into(), + direct, + &(0..32).map(|x| f16asu16(x as f32)).collect::>(), + &[ + 0, 1, // m2[0] + 2, 3, // m2[1] + // + 4, 5, 6, // m3[0] + 8, 9, 10, // m3[1] + 12, 13, 14, // m3[2] + // + 16, 17, 18, 19, // m4[0] + 20, 21, 22, 23, // m4[1] + 24, 25, 26, 27, // m4[2] + 28, 29, 30, 31, // m4[3] + ], + )); + } + + // // Nested struct and array test. + // // + // // This tries to exploit all the weird edge cases of the struct layout algorithm. + // // We dont go as all-out as the other nested struct test because + // // all our primitives are twice as wide and we have only so much buffer to spare. + // { + // let header = String::from( + // "struct Inner { scalar: u64, scalar32: u32, member: array, 2> }", + // ); + // let members = String::from("inner: Inner"); + // let direct = String::from( + // "\ + + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, 1, // inner.scalar + // 2, // inner.scalar32 + // 8, 9, 10, 11, 12, 13, // inner.member[0] + // 16, 17, 18, 19, 20, 21, // inner.member[1] + // ], + // ) + // .header(header), + // ); + // } + // { + // let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }"); + // let members = String::from("inner: Inner, vector: vec3"); + // let direct = String::from( + // "\ + // output[0] = bitcast(input.inner.scalar32); + // output[1] = u32(bitcast(input.inner.scalar) & 0xFFFFFFFF); + // output[2] = u32((bitcast(input.inner.scalar) >> 32) & 0xFFFFFFFF); + // output[3] = bitcast(input.inner.scalar32_2); + // output[4] = u32(bitcast(input.vector.x) & 0xFFFFFFFF); + // output[5] = u32((bitcast(input.vector.x) >> 32) & 0xFFFFFFFF); + // output[6] = u32(bitcast(input.vector.y) & 0xFFFFFFFF); + // output[7] = u32((bitcast(input.vector.y) >> 32) & 0xFFFFFFFF); + // output[8] = u32(bitcast(input.vector.z) & 0xFFFFFFFF); + // output[9] = u32((bitcast(input.vector.z) >> 32) & 0xFFFFFFFF); + // ", + // ); + + // tests.push( + // ShaderTest::new( + // String::from("nested struct and array"), + // members, + // direct, + // &input_values, + // &[ + // 0, // inner.scalar32 + // 2, 3, // inner.scalar + // 4, // inner.scalar32_2 + // 8, 9, 10, 11, 12, 13, // vector + // ], + // ) + // .header(header), + // ); + // } + + // Insert `enable f16;` header + tests + .into_iter() + .map(|test| test.header("enable f16;".into())) + .collect() +} diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 30c4daad6d..a8d1329fac 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -383,6 +383,10 @@ pub fn create_validator( features.contains(wgt::Features::PUSH_CONSTANTS), ); caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64)); + caps.set( + Caps::SHADER_FLOAT16, + features.contains(wgt::Features::SHADER_F16), + ); caps.set( Caps::PRIMITIVE_INDEX, features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX), diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 82b62c5161..50aa80ad28 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -425,6 +425,23 @@ impl super::Adapter { && features1.Int64ShaderOps.as_bool(), ); + let float16_supported = { + let mut features4 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4::default(); + let hr = unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax + ptr::from_mut(&mut features4).cast(), + size_of::() as _, + ) + }; + hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool() + }; + + features.set( + wgt::Features::SHADER_F16, + shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported, + ); + features.set( wgt::Features::TEXTURE_INT64_ATOMIC, shader_model >= naga::back::hlsl::ShaderModel::V6_6 @@ -612,7 +629,7 @@ impl crate::Adapter for super::Adapter { unsafe fn open( &self, - _features: wgt::Features, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { @@ -633,6 +650,7 @@ impl crate::Adapter for super::Adapter { let device = super::Device::new( self.device.clone(), queue.clone(), + features, limits, memory_hints, self.private_caps, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 8b03df0e97..a3bfad3359 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -33,9 +33,11 @@ use crate::{ const NAGA_LOCATION_SEMANTIC: &[u8] = c"LOC".to_bytes(); impl super::Device { + #[allow(clippy::too_many_arguments)] pub(super) fn new( raw: Direct3D12::ID3D12Device, present_queue: Direct3D12::ID3D12CommandQueue, + features: wgt::Features, limits: &wgt::Limits, memory_hints: &wgt::MemoryHints, private_caps: super::PrivateCapabilities, @@ -178,6 +180,7 @@ impl super::Device { event: Event::create(false, false)?, }, private_caps, + features, shared: Arc::new(shared), rtv_pool: Mutex::new(rtv_pool), dsv_pool: Mutex::new(descriptor::CpuPool::new( diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 1df4886028..48fcbee6eb 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -634,6 +634,7 @@ pub struct Device { present_queue: Direct3D12::ID3D12CommandQueue, idler: Idler, private_caps: PrivateCapabilities, + features: wgt::Features, shared: Arc, // CPU only pools rtv_pool: Mutex, diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs index 4574fac5d4..834409eefc 100644 --- a/wgpu-hal/src/dx12/shader_compilation.rs +++ b/wgpu-hal/src/dx12/shader_compilation.rs @@ -268,7 +268,7 @@ pub(super) fn compile_dxc( let raw_ep = OPCWSTR::new(raw_ep); let full_stage = OPCWSTR::new(full_stage); - let mut compile_args = arrayvec::ArrayVec::::new_const(); + let mut compile_args = arrayvec::ArrayVec::::new_const(); if let Some(source_name) = source_name.as_ref() { compile_args.push(source_name.ptr()) @@ -298,6 +298,10 @@ pub(super) fn compile_dxc( compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS); } + if device.features.contains(wgt::Features::SHADER_F16) { + compile_args.push(windows::core::w!("-enable-16bit-types")); + } + let buffer = Dxc::DxcBuffer { Ptr: source.as_ptr().cast(), Size: source.len(), diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 8d315f042b..a438401277 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -399,6 +399,7 @@ impl PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true), vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) + .storage_input_output16(true) .uniform_and_storage_buffer16_bit_access(true), )) } else { @@ -720,7 +721,8 @@ impl PhysicalDeviceFeatures { F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0 + && bit16.storage_input_output16 != 0, ); } @@ -1968,6 +1970,10 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64); } + if features.contains(wgt::Features::SHADER_F16) { + capabilities.push(spv::Capability::Float16); + } + if features.intersects( wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX From c4e49c67fd7309d0c33287b9a53a9b5703eb82ee Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 18 Mar 2025 13:53:24 -0400 Subject: [PATCH 2/4] Comments --- Cargo.lock | 14 +----- Cargo.toml | 3 +- naga/src/front/wgsl/error.rs | 8 +-- naga/src/front/wgsl/parse/mod.rs | 2 +- naga/src/front/wgsl/parse/number.rs | 2 +- naga/src/ir/mod.rs | 1 + naga/tests/wgsl_errors.rs | 18 ++++--- tests/gpu-tests/shader/struct_layout.rs | 67 ------------------------- wgpu-types/src/features.rs | 8 +-- 9 files changed, 25 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a90f06c9a0..0a3ee03339 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1784,16 +1784,6 @@ name = "half" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" -dependencies = [ - "cfg-if", - "crunchy", -] - -[[package]] -name = "half-2" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864" dependencies = [ "arbitrary", "bytemuck", @@ -2413,7 +2403,7 @@ dependencies = [ "codespan-reporting", "diff", "env_logger", - "half-2", + "half", "hashbrown", "hexf-parse", "hlsl-snapshots", @@ -4866,7 +4856,7 @@ dependencies = [ "env_logger", "futures-lite", "glam", - "half-2", + "half", "image", "itertools 0.13.0", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index b2ccb84973..d8ad7a0381 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,8 +108,7 @@ fern = "0.7" flume = "0.11" futures-lite = "2" glam = "0.29" -# TODO: Use `half` directly again after is released upstream. -half = { package = "half-2", version = "2.4.1" } +half = "2.5" # We require 2.5 to have `Arbitrary` support. hashbrown = { version = "0.14.5", default-features = false, features = [ "ahash", "inline-more", diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs index aae67a94b5..346cd8e6ce 100644 --- a/naga/src/front/wgsl/error.rs +++ b/naga/src/front/wgsl/error.rs @@ -1024,12 +1024,12 @@ impl<'a> Error<'a> { )], }, Error::EnableExtensionNotEnabled { kind, span } => ParseError { - message: format!("the `{}` language extension is not enabled", kind.to_ident()), + message: format!("the `{}` enable extension is not enabled", kind.to_ident()), labels: vec![( span, format!( concat!( - "the `{0}` language extension is needed for this functionality, ", + "the `{}` \"Enable Extension\" is needed for this functionality, ", "but it is not currently enabled." ), kind.to_ident() @@ -1039,7 +1039,7 @@ impl<'a> Error<'a> { notes: if let EnableExtension::Unimplemented(kind) = kind { vec![format!( concat!( - "This enable-extension is not yet implemented. ", + "This \"Enable Extension\" is not yet implemented. ", "Let Naga maintainers know that you ran into this at ", ", ", "so they can prioritize it!" @@ -1049,7 +1049,7 @@ impl<'a> Error<'a> { } else { vec![ format!( - "You can enable this extension by adding `enable {};` at the top of the shader.", + "You can enable this extension by adding `enable {};` at the top of the shader, before any other items.", kind.to_ident() ), ] diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs index cb59a82a24..fe878969b5 100644 --- a/naga/src/front/wgsl/parse/mod.rs +++ b/naga/src/front/wgsl/parse/mod.rs @@ -837,7 +837,7 @@ impl Parser { let _ = lexer.next(); let num = res.map_err(|err| Error::BadNumber(span, err))?; - if let Some(enable_extension) = num.required_enable_extension() { + if let Some(enable_extension) = num.requires_enable_extension() { if !lexer.enable_extensions.contains(enable_extension) { return Err(Box::new(Error::EnableExtensionNotEnabled { kind: enable_extension.into(), diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs index 776c357343..ea3e43e3dd 100644 --- a/naga/src/front/wgsl/parse/number.rs +++ b/naga/src/front/wgsl/parse/number.rs @@ -29,7 +29,7 @@ pub enum Number { } impl Number { - pub(super) const fn required_enable_extension(&self) -> Option { + pub(super) const fn requires_enable_extension(&self) -> Option { match *self { Number::F16(_) => Some(ImplementedEnableExtension::F16), _ => None, diff --git a/naga/src/ir/mod.rs b/naga/src/ir/mod.rs index ed7fd7fece..167871a8c0 100644 --- a/naga/src/ir/mod.rs +++ b/naga/src/ir/mod.rs @@ -819,6 +819,7 @@ pub enum Literal { F64(f64), /// May not be NaN or infinity. F32(f32), + /// May not be NaN or infinity. F16(f16), U32(u32), I32(i32), diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs index e1d0a42234..a9cdfbb25a 100644 --- a/naga/tests/wgsl_errors.rs +++ b/naga/tests/wgsl_errors.rs @@ -847,13 +847,13 @@ fn float16_requires_enable() { r#" const a: f16 = 1.0; "#, - r#"error: the `f16` language extension is not enabled + r#"error: the `f16` enable extension is not enabled ┌─ wgsl:2:22 │ 2 │ const a: f16 = 1.0; - │ ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ ^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled. │ - = note: You can enable this extension by adding `enable f16;` at the top of the shader. + = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items. "#, ); @@ -862,13 +862,13 @@ fn float16_requires_enable() { r#" const a = 1.0h; "#, - r#"error: the `f16` language extension is not enabled + r#"error: the `f16` enable extension is not enabled ┌─ wgsl:2:23 │ 2 │ const a = 1.0h; - │ ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled. + │ ^^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled. │ - = note: You can enable this extension by adding `enable f16;` at the top of the shader. + = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items. "#, ); @@ -1461,11 +1461,13 @@ fn invalid_blend_src() { @fragment fn main(@builtin(position) position: vec4) -> FragmentOutput { return FragmentOutput(vec4(0.0), vec4(0.0)); } ", - r###"error: `dual_source_blending` enable-extension is not enabled + r###"error: the `dual_source_blending` enable extension is not enabled ┌─ wgsl:3:27 │ 3 │ @location(0) @blend_src(0) output0: vec4, - │ ^^^^^^^^^ the `dual_source_blending` enable-extension is needed for this functionality, but it is not currently enabled + │ ^^^^^^^^^ the `dual_source_blending` "Enable Extension" is needed for this functionality, but it is not currently enabled. + │ + = note: You can enable this extension by adding `enable dual_source_blending;` at the top of the shader, before any other items. "###, ); diff --git a/tests/gpu-tests/shader/struct_layout.rs b/tests/gpu-tests/shader/struct_layout.rs index 25bd32bf77..cf57e2a04e 100644 --- a/tests/gpu-tests/shader/struct_layout.rs +++ b/tests/gpu-tests/shader/struct_layout.rs @@ -617,73 +617,6 @@ fn create_16bit_struct_layout_test() -> Vec { )); } - // // Nested struct and array test. - // // - // // This tries to exploit all the weird edge cases of the struct layout algorithm. - // // We dont go as all-out as the other nested struct test because - // // all our primitives are twice as wide and we have only so much buffer to spare. - // { - // let header = String::from( - // "struct Inner { scalar: u64, scalar32: u32, member: array, 2> }", - // ); - // let members = String::from("inner: Inner"); - // let direct = String::from( - // "\ - - // ", - // ); - - // tests.push( - // ShaderTest::new( - // String::from("nested struct and array"), - // members, - // direct, - // &input_values, - // &[ - // 0, 1, // inner.scalar - // 2, // inner.scalar32 - // 8, 9, 10, 11, 12, 13, // inner.member[0] - // 16, 17, 18, 19, 20, 21, // inner.member[1] - // ], - // ) - // .header(header), - // ); - // } - // { - // let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }"); - // let members = String::from("inner: Inner, vector: vec3"); - // let direct = String::from( - // "\ - // output[0] = bitcast(input.inner.scalar32); - // output[1] = u32(bitcast(input.inner.scalar) & 0xFFFFFFFF); - // output[2] = u32((bitcast(input.inner.scalar) >> 32) & 0xFFFFFFFF); - // output[3] = bitcast(input.inner.scalar32_2); - // output[4] = u32(bitcast(input.vector.x) & 0xFFFFFFFF); - // output[5] = u32((bitcast(input.vector.x) >> 32) & 0xFFFFFFFF); - // output[6] = u32(bitcast(input.vector.y) & 0xFFFFFFFF); - // output[7] = u32((bitcast(input.vector.y) >> 32) & 0xFFFFFFFF); - // output[8] = u32(bitcast(input.vector.z) & 0xFFFFFFFF); - // output[9] = u32((bitcast(input.vector.z) >> 32) & 0xFFFFFFFF); - // ", - // ); - - // tests.push( - // ShaderTest::new( - // String::from("nested struct and array"), - // members, - // direct, - // &input_values, - // &[ - // 0, // inner.scalar32 - // 2, 3, // inner.scalar - // 4, // inner.scalar32_2 - // 8, 9, 10, 11, 12, 13, // vector - // ], - // ) - // .header(header), - // ); - // } - // Insert `enable f16;` header tests .into_iter() diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs index c6e98e6566..b375b3c26c 100644 --- a/wgpu-types/src/features.rs +++ b/wgpu-types/src/features.rs @@ -1340,18 +1340,20 @@ bitflags_array! { /// This is a web and native feature. const INDIRECT_FIRST_INSTANCE = WEBGPU_FEATURE_INDIRECT_FIRST_INSTANCE; - /// Allows shaders to acquire the FP16 ability + /// Allows shaders to use 16-bit floating point types. You may use them uniform buffers, + /// storage buffers, and local variables. You may not use them in push constants. /// - /// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now. + /// In order to use this in WGSL shaders, you must add `enable f16;` to the top of your shader, + /// before any global items. /// /// Supported Platforms: /// - Vulkan /// - Metal + /// - DX12 /// /// This is a web and native feature. const SHADER_F16 = WEBGPU_FEATURE_SHADER_F16; - /// Allows for usage of textures of format [`TextureFormat::Rg11b10Ufloat`] as a render target /// /// Supported platforms: From 136180582c3ab120554851461de9029eb1b8e00d Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 18 Mar 2025 14:18:34 -0400 Subject: [PATCH 3/4] Bump version of num-traits to 0.2.16 --- naga/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/naga/Cargo.toml b/naga/Cargo.toml index b722e7a2cb..2f57895e11 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -95,7 +95,8 @@ half = { workspace = true, features = ["arbitrary", "num-traits"] } rustc-hash.workspace = true indexmap.workspace = true log = "0.4" -num-traits = "0.2" +# We require 0.2.16 to have `FromBytes` and `ToBytes` which `half` require. +num-traits = "0.2.16" strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true } thiserror.workspace = true From 04dfdd5f74fec9940e45307c1677bf43282dce66 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Wed, 19 Mar 2025 09:53:47 -0400 Subject: [PATCH 4/4] fixup! Bump version of num-traits to 0.2.16 --- naga/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 2f57895e11..445ad73c17 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -95,7 +95,7 @@ half = { workspace = true, features = ["arbitrary", "num-traits"] } rustc-hash.workspace = true indexmap.workspace = true log = "0.4" -# We require 0.2.16 to have `FromBytes` and `ToBytes` which `half` require. +# `half` requires 0.2.16 for `FromBytes` and `ToBytes`. num-traits = "0.2.16" strum = { workspace = true, optional = true } spirv = { version = "0.3", optional = true }