From ef5bf361c12e55591c81db2de3c799af269ede91 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Wed, 19 Feb 2025 21:50:37 -0500
Subject: [PATCH 1/4] Implement F16 Support

Co-authored-by: FL33TW00D <fleetwoodpersonal@gmail.com>
Co-authored-by: ErichDonGubler <erichdongubler@gmail.com>
---
 Cargo.lock                                    |  24 +
 Cargo.toml                                    |   2 +
 naga/Cargo.toml                               |  11 +-
 naga/src/back/glsl/mod.rs                     |   3 +
 naga/src/back/hlsl/writer.rs                  |   1 +
 naga/src/back/msl/writer.rs                   |  34 +-
 naga/src/back/spv/block.rs                    | 386 ++++---
 naga/src/back/spv/instructions.rs             |   4 +
 naga/src/back/spv/writer.rs                   |  26 +
 naga/src/back/wgsl/writer.rs                  |  40 +
 naga/src/common/wgsl/to_wgsl.rs               |   1 +
 naga/src/front/glsl/error.rs                  |  12 +-
 naga/src/front/glsl/offset.rs                 |  24 +-
 naga/src/front/glsl/types.rs                  |   5 +
 naga/src/front/spv/mod.rs                     |   5 +
 naga/src/front/wgsl/error.rs                  |  16 +-
 naga/src/front/wgsl/lower/mod.rs              |   1 +
 naga/src/front/wgsl/parse/conv.rs             |  27 +-
 .../wgsl/parse/directive/enable_extension.rs  |  24 +-
 naga/src/front/wgsl/parse/lexer.rs            |  40 +-
 naga/src/front/wgsl/parse/mod.rs              | 189 +++-
 naga/src/front/wgsl/parse/number.rs           |  23 +-
 naga/src/ir/mod.rs                            |   2 +
 naga/src/proc/constant_evaluator.rs           | 118 ++-
 naga/src/proc/mod.rs                          |   4 +
 naga/src/proc/type_methods.rs                 |   4 +
 naga/src/valid/expression.rs                  |   2 +-
 naga/src/valid/interface.rs                   |   7 +-
 naga/src/valid/mod.rs                         |   4 +-
 naga/src/valid/type.rs                        |  71 +-
 naga/tests/in/glsl/f16-glsl.comp              |  57 +
 naga/tests/in/glsl/f16-glsl.toml              |   1 +
 naga/tests/in/spv/f16-spv.comp                |  57 +
 naga/tests/in/spv/f16-spv.spvasm              | 130 +++
 naga/tests/in/spv/f16-spv.toml                |   1 +
 naga/tests/in/wgsl/extra.wgsl                 |   2 +-
 naga/tests/in/wgsl/f16.toml                   |  15 +
 naga/tests/in/wgsl/f16.wgsl                   | 127 +++
 naga/tests/out/hlsl/f16.hlsl                  | 351 +++++++
 naga/tests/out/hlsl/f16.ron                   |  12 +
 naga/tests/out/msl/extra.msl                  |   2 +-
 naga/tests/out/msl/f16.msl                    | 177 ++++
 .../spv/atomicCompareExchange-int64.spvasm    | 224 ++--
 naga/tests/out/spv/extra.spvasm               | 118 ++-
 naga/tests/out/spv/f16.spvasm                 | 633 ++++++++++++
 naga/tests/out/spv/image.spvasm               | 976 +++++++++---------
 naga/tests/out/wgsl/extra.wgsl                |   2 +-
 naga/tests/out/wgsl/f16-glsl.comp.wgsl        |  47 +
 naga/tests/out/wgsl/f16-spv.wgsl              |  46 +
 naga/tests/out/wgsl/f16.wgsl                  | 167 +++
 naga/tests/spirv_capabilities.rs              |   5 +
 naga/tests/wgsl_errors.rs                     | 107 +-
 tests/Cargo.toml                              |   1 +
 tests/gpu-tests/shader/mod.rs                 |   4 +-
 tests/gpu-tests/shader/struct_layout.rs       | 383 +++++--
 wgpu-core/src/device/mod.rs                   |   4 +
 wgpu-hal/src/dx12/adapter.rs                  |  20 +-
 wgpu-hal/src/dx12/device.rs                   |   3 +
 wgpu-hal/src/dx12/mod.rs                      |   1 +
 wgpu-hal/src/dx12/shader_compilation.rs       |   6 +-
 wgpu-hal/src/vulkan/adapter.rs                |   8 +-
 61 files changed, 3807 insertions(+), 990 deletions(-)
 create mode 100644 naga/tests/in/glsl/f16-glsl.comp
 create mode 100644 naga/tests/in/glsl/f16-glsl.toml
 create mode 100644 naga/tests/in/spv/f16-spv.comp
 create mode 100644 naga/tests/in/spv/f16-spv.spvasm
 create mode 100644 naga/tests/in/spv/f16-spv.toml
 create mode 100644 naga/tests/in/wgsl/f16.toml
 create mode 100644 naga/tests/in/wgsl/f16.wgsl
 create mode 100644 naga/tests/out/hlsl/f16.hlsl
 create mode 100644 naga/tests/out/hlsl/f16.ron
 create mode 100644 naga/tests/out/msl/f16.msl
 create mode 100644 naga/tests/out/spv/f16.spvasm
 create mode 100644 naga/tests/out/wgsl/f16-glsl.comp.wgsl
 create mode 100644 naga/tests/out/wgsl/f16-spv.wgsl
 create mode 100644 naga/tests/out/wgsl/f16.wgsl

diff --git a/Cargo.lock b/Cargo.lock
index aae307fb04..a90f06c9a0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1789,6 +1789,20 @@ dependencies = [
  "crunchy",
 ]
 
+[[package]]
+name = "half-2"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864"
+dependencies = [
+ "arbitrary",
+ "bytemuck",
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "serde",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -2193,6 +2207,12 @@ dependencies = [
  "windows-targets 0.48.5",
 ]
 
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
 [[package]]
 name = "libredox"
 version = "0.1.3"
@@ -2393,12 +2413,14 @@ dependencies = [
  "codespan-reporting",
  "diff",
  "env_logger",
+ "half-2",
  "hashbrown",
  "hexf-parse",
  "hlsl-snapshots",
  "indexmap",
  "itertools 0.13.0",
  "log",
+ "num-traits",
  "petgraph 0.7.1",
  "pp-rs",
  "ron",
@@ -2574,6 +2596,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 dependencies = [
  "autocfg",
+ "libm",
 ]
 
 [[package]]
@@ -4843,6 +4866,7 @@ dependencies = [
  "env_logger",
  "futures-lite",
  "glam",
+ "half-2",
  "image",
  "itertools 0.13.0",
  "js-sys",
diff --git a/Cargo.toml b/Cargo.toml
index c2e4eea54d..b2ccb84973 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,6 +108,8 @@ fern = "0.7"
 flume = "0.11"
 futures-lite = "2"
 glam = "0.29"
+# TODO: Use `half` directly again after <https://github.com/starkat99/half-rs/pull/111> is released upstream.
+half = { package = "half-2", version = "2.4.1" }
 hashbrown = { version = "0.14.5", default-features = false, features = [
     "ahash",
     "inline-more",
diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 131e0c6291..b722e7a2cb 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -41,10 +41,17 @@ msl-out = []
 ## If you want to enable MSL output it regardless of the target platform, use `naga/msl-out`.
 msl-out-if-target-apple = []
 
-serialize = ["dep:serde", "bitflags/serde", "hashbrown/serde", "indexmap/serde"]
+serialize = [
+    "dep:serde",
+    "bitflags/serde",
+    "half/serde",
+    "hashbrown/serde",
+    "indexmap/serde",
+]
 deserialize = [
     "dep:serde",
     "bitflags/serde",
+    "half/serde",
     "hashbrown/serde",
     "indexmap/serde",
 ]
@@ -84,9 +91,11 @@ termcolor = { version = "1.4.1" }
 # https://github.com/brendanzab/codespan/commit/e99c867339a877731437e7ee6a903a3d03b5439e
 codespan-reporting = { version = "0.11.0" }
 hashbrown.workspace = true
+half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
+num-traits = "0.2"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index e517e84911..403cefd566 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -2703,6 +2703,9 @@ impl<'a, W: Write> Writer<'a, W> {
                     // decimal part even it's zero which is needed for a valid glsl float constant
                     crate::Literal::F64(value) => write!(self.out, "{value:?}LF")?,
                     crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                    crate::Literal::F16(_) => {
+                        return Err(Error::Custom("GLSL has no 16-bit float type".into()));
+                    }
                     // Unsigned integers need a `u` at the end
                     //
                     // While `core` doesn't necessarily need it, it's allowed and since `es` needs it we
diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs
index 6cbba65f95..c372342449 100644
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@@ -2628,6 +2628,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                 // decimal part even it's zero
                 crate::Literal::F64(value) => write!(self.out, "{value:?}L")?,
                 crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
+                crate::Literal::F16(value) => write!(self.out, "{value:?}h")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 // HLSL has no suffix for explicit i32 literals, but not using any suffix
                 // makes the type ambiguous which prevents overload resolution from
diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs
index 8d7fab3dd1..0c4f219a1b 100644
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@@ -8,6 +8,9 @@ use core::{
     fmt::{Display, Error as FmtError, Formatter, Write},
     iter,
 };
+use num_traits::real::Real as _;
+
+use half::f16;
 
 use super::{sampler as sm, Error, LocationMode, Options, PipelineOptions, TranslationInfo};
 use crate::{
@@ -182,9 +185,11 @@ impl Display for TypeContext<'_> {
                 write!(out, "{}::atomic_{}", NAMESPACE, scalar.to_msl_name())
             }
             crate::TypeInner::Vector { size, scalar } => put_numeric_type(out, scalar, &[size]),
-            crate::TypeInner::Matrix { columns, rows, .. } => {
-                put_numeric_type(out, crate::Scalar::F32, &[rows, columns])
-            }
+            crate::TypeInner::Matrix {
+                columns,
+                rows,
+                scalar,
+            } => put_numeric_type(out, scalar, &[rows, columns]),
             crate::TypeInner::Pointer { base, space } => {
                 let sub = Self {
                     handle: base,
@@ -425,8 +430,12 @@ impl crate::Scalar {
         match self {
             Self {
                 kind: Sk::Float,
-                width: _,
+                width: 4,
             } => "float",
+            Self {
+                kind: Sk::Float,
+                width: 2,
+            } => "half",
             Self {
                 kind: Sk::Sint,
                 width: 4,
@@ -483,7 +492,7 @@ fn should_pack_struct_member(
     match *ty_inner {
         crate::TypeInner::Vector {
             size: crate::VectorSize::Tri,
-            scalar: scalar @ crate::Scalar { width: 4, .. },
+            scalar: scalar @ crate::Scalar { width: 4 | 2, .. },
         } if is_tight => Some(scalar),
         _ => None,
     }
@@ -1459,6 +1468,21 @@ impl<W: Write> Writer<W> {
                 crate::Literal::F64(_) => {
                     return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
                 }
+                crate::Literal::F16(value) => {
+                    if value.is_infinite() {
+                        let sign = if value.is_sign_negative() { "-" } else { "" };
+                        write!(self.out, "{sign}INFINITY")?;
+                    } else if value.is_nan() {
+                        write!(self.out, "NAN")?;
+                    } else {
+                        let suffix = if value.fract() == f16::from_f32(0.0) {
+                            ".0h"
+                        } else {
+                            "h"
+                        };
+                        write!(self.out, "{value}{suffix}")?;
+                    }
+                }
                 crate::Literal::F32(value) => {
                     if value.is_infinite() {
                         let sign = if value.is_sign_negative() { "-" } else { "" };
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 9acb53d90d..858cc2b71a 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -4,6 +4,7 @@ Implementations for `BlockContext` methods.
 
 use alloc::vec::Vec;
 
+use arrayvec::ArrayVec;
 use spirv::Word;
 
 use super::{
@@ -1612,159 +1613,7 @@ impl BlockContext<'_> {
                 expr,
                 kind,
                 convert,
-            } => {
-                use crate::ScalarKind as Sk;
-
-                let expr_id = self.cached[expr];
-                let (src_scalar, src_size, is_matrix) =
-                    match *self.fun_info[expr].ty.inner_with(&self.ir_module.types) {
-                        crate::TypeInner::Scalar(scalar) => (scalar, None, false),
-                        crate::TypeInner::Vector { scalar, size } => (scalar, Some(size), false),
-                        crate::TypeInner::Matrix { scalar, .. } => (scalar, None, true),
-                        ref other => {
-                            log::error!("As source {:?}", other);
-                            return Err(Error::Validation("Unexpected Expression::As source"));
-                        }
-                    };
-
-                enum Cast {
-                    Identity,
-                    Unary(spirv::Op),
-                    Binary(spirv::Op, Word),
-                    Ternary(spirv::Op, Word, Word),
-                }
-
-                let cast = if is_matrix {
-                    // we only support identity casts for matrices
-                    Cast::Unary(spirv::Op::CopyObject)
-                } else {
-                    match (src_scalar.kind, kind, convert) {
-                        // Filter out identity casts. Some Adreno drivers are
-                        // confused by no-op OpBitCast instructions.
-                        (src_kind, kind, convert)
-                            if src_kind == kind
-                                && convert.filter(|&width| width != src_scalar.width).is_none() =>
-                        {
-                            Cast::Identity
-                        }
-                        (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
-                        (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
-                        // casting to a bool - generate `OpXxxNotEqual`
-                        (_, Sk::Bool, Some(_)) => {
-                            let op = match src_scalar.kind {
-                                Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
-                                Sk::Float => spirv::Op::FUnordNotEqual,
-                                Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, src_scalar)?;
-                            let zero_id = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: src_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    self.writer.get_constant_composite(ty, &self.temp_list)
-                                }
-                                None => zero_scalar_id,
-                            };
-
-                            Cast::Binary(op, zero_id)
-                        }
-                        // casting from a bool - generate `OpSelect`
-                        (Sk::Bool, _, Some(dst_width)) => {
-                            let dst_scalar = crate::Scalar {
-                                kind,
-                                width: dst_width,
-                            };
-                            let zero_scalar_id =
-                                self.writer.get_constant_scalar_with(0, dst_scalar)?;
-                            let one_scalar_id =
-                                self.writer.get_constant_scalar_with(1, dst_scalar)?;
-                            let (accept_id, reject_id) = match src_size {
-                                Some(size) => {
-                                    let ty = LocalType::Numeric(NumericType::Vector {
-                                        size,
-                                        scalar: dst_scalar,
-                                    })
-                                    .into();
-
-                                    self.temp_list.clear();
-                                    self.temp_list.resize(size as _, zero_scalar_id);
-
-                                    let vec0_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    self.temp_list.fill(one_scalar_id);
-
-                                    let vec1_id =
-                                        self.writer.get_constant_composite(ty, &self.temp_list);
-
-                                    (vec1_id, vec0_id)
-                                }
-                                None => (one_scalar_id, zero_scalar_id),
-                            };
-
-                            Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
-                        }
-                        (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
-                        (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
-                        (Sk::Float, Sk::Float, Some(dst_width))
-                            if src_scalar.width != dst_width =>
-                        {
-                            Cast::Unary(spirv::Op::FConvert)
-                        }
-                        (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
-                        (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
-                        (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::SConvert)
-                        }
-                        (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
-                            Cast::Unary(spirv::Op::UConvert)
-                        }
-                        // We assume it's either an identity cast, or int-uint.
-                        _ => Cast::Unary(spirv::Op::Bitcast),
-                    }
-                };
-
-                let id = self.gen_id();
-                let instruction = match cast {
-                    Cast::Identity => None,
-                    Cast::Unary(op) => Some(Instruction::unary(op, result_type_id, id, expr_id)),
-                    Cast::Binary(op, operand) => Some(Instruction::binary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        operand,
-                    )),
-                    Cast::Ternary(op, op1, op2) => Some(Instruction::ternary(
-                        op,
-                        result_type_id,
-                        id,
-                        expr_id,
-                        op1,
-                        op2,
-                    )),
-                };
-                if let Some(instruction) = instruction {
-                    block.body.push(instruction);
-                    id
-                } else {
-                    expr_id
-                }
-            }
+            } => self.write_as_expression(expr, convert, kind, block, result_type_id)?,
             crate::Expression::ImageLoad {
                 image,
                 coordinate,
@@ -1925,6 +1774,237 @@ impl BlockContext<'_> {
         Ok(())
     }
 
+    /// Helper which focuses on generating the `As` expressions and the various conversions
+    /// that need to happen because of that.
+    fn write_as_expression(
+        &mut self,
+        expr: Handle<crate::Expression>,
+        convert: Option<u8>,
+        kind: crate::ScalarKind,
+
+        block: &mut Block,
+        result_type_id: u32,
+    ) -> Result<u32, Error> {
+        use crate::ScalarKind as Sk;
+        let expr_id = self.cached[expr];
+        let ty = self.fun_info[expr].ty.inner_with(&self.ir_module.types);
+
+        // Matrix casts needs special treatment in SPIR-V, as the cast functions
+        // can take vectors or scalars, but not matrices. In order to cast a matrix
+        // we need to cast each column of the matrix individually and construct a new
+        // matrix from the converted columns.
+        if let crate::TypeInner::Matrix {
+            columns,
+            rows,
+            scalar,
+        } = *ty
+        {
+            let Some(convert) = convert else {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            };
+
+            if convert == scalar.width {
+                // No conversion needs to be done, passes through.
+                return Ok(expr_id);
+            }
+
+            if kind != Sk::Float {
+                // Only float conversions are supported for matrices.
+                return Err(Error::Validation("Matrices must be floats"));
+            }
+
+            // Type of each extracted column
+            let column_src_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar,
+                })));
+
+            // Type of the column after conversion
+            let column_dst_ty =
+                self.get_type_id(LookupType::Local(LocalType::Numeric(NumericType::Vector {
+                    size: rows,
+                    scalar: crate::Scalar {
+                        kind,
+                        width: convert,
+                    },
+                })));
+
+            let mut components = ArrayVec::<Word, 4>::new();
+
+            for column in 0..columns as usize {
+                let column_id = self.gen_id();
+                block.body.push(Instruction::composite_extract(
+                    column_src_ty,
+                    column_id,
+                    expr_id,
+                    &[column as u32],
+                ));
+
+                let column_conv_id = self.gen_id();
+                block.body.push(Instruction::unary(
+                    spirv::Op::FConvert,
+                    column_dst_ty,
+                    column_conv_id,
+                    column_id,
+                ));
+
+                components.push(column_conv_id);
+            }
+
+            let construct_id = self.gen_id();
+
+            block.body.push(Instruction::composite_construct(
+                result_type_id,
+                construct_id,
+                &components,
+            ));
+
+            return Ok(construct_id);
+        }
+
+        let (src_scalar, src_size) = match *ty {
+            crate::TypeInner::Scalar(scalar) => (scalar, None),
+            crate::TypeInner::Vector { scalar, size } => (scalar, Some(size)),
+            ref other => {
+                log::error!("As source {:?}", other);
+                return Err(Error::Validation("Unexpected Expression::As source"));
+            }
+        };
+
+        enum Cast {
+            Identity,
+            Unary(spirv::Op),
+            Binary(spirv::Op, Word),
+            Ternary(spirv::Op, Word, Word),
+        }
+        let cast = match (src_scalar.kind, kind, convert) {
+            // Filter out identity casts. Some Adreno drivers are
+            // confused by no-op OpBitCast instructions.
+            (src_kind, kind, convert)
+                if src_kind == kind
+                    && convert.filter(|&width| width != src_scalar.width).is_none() =>
+            {
+                Cast::Identity
+            }
+            (Sk::Bool, Sk::Bool, _) => Cast::Unary(spirv::Op::CopyObject),
+            (_, _, None) => Cast::Unary(spirv::Op::Bitcast),
+            // casting to a bool - generate `OpXxxNotEqual`
+            (_, Sk::Bool, Some(_)) => {
+                let op = match src_scalar.kind {
+                    Sk::Sint | Sk::Uint => spirv::Op::INotEqual,
+                    Sk::Float => spirv::Op::FUnordNotEqual,
+                    Sk::Bool | Sk::AbstractInt | Sk::AbstractFloat => unreachable!(),
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, src_scalar)?;
+                let zero_id = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: src_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        self.writer.get_constant_composite(ty, &self.temp_list)
+                    }
+                    None => zero_scalar_id,
+                };
+
+                Cast::Binary(op, zero_id)
+            }
+            // casting from a bool - generate `OpSelect`
+            (Sk::Bool, _, Some(dst_width)) => {
+                let dst_scalar = crate::Scalar {
+                    kind,
+                    width: dst_width,
+                };
+                let zero_scalar_id = self.writer.get_constant_scalar_with(0, dst_scalar)?;
+                let one_scalar_id = self.writer.get_constant_scalar_with(1, dst_scalar)?;
+                let (accept_id, reject_id) = match src_size {
+                    Some(size) => {
+                        let ty = LocalType::Numeric(NumericType::Vector {
+                            size,
+                            scalar: dst_scalar,
+                        })
+                        .into();
+
+                        self.temp_list.clear();
+                        self.temp_list.resize(size as _, zero_scalar_id);
+
+                        let vec0_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        self.temp_list.fill(one_scalar_id);
+
+                        let vec1_id = self.writer.get_constant_composite(ty, &self.temp_list);
+
+                        (vec1_id, vec0_id)
+                    }
+                    None => (one_scalar_id, zero_scalar_id),
+                };
+
+                Cast::Ternary(spirv::Op::Select, accept_id, reject_id)
+            }
+            (Sk::Float, Sk::Uint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToU),
+            (Sk::Float, Sk::Sint, Some(_)) => Cast::Unary(spirv::Op::ConvertFToS),
+            (Sk::Float, Sk::Float, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::FConvert)
+            }
+            (Sk::Sint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertSToF),
+            (Sk::Sint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Uint, Sk::Float, Some(_)) => Cast::Unary(spirv::Op::ConvertUToF),
+            (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::SConvert)
+            }
+            (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                Cast::Unary(spirv::Op::UConvert)
+            }
+            // We assume it's either an identity cast, or int-uint.
+            _ => Cast::Unary(spirv::Op::Bitcast),
+        };
+        Ok(match cast {
+            Cast::Identity => expr_id,
+            Cast::Unary(op) => {
+                let id = self.gen_id();
+                block
+                    .body
+                    .push(Instruction::unary(op, result_type_id, id, expr_id));
+                id
+            }
+            Cast::Binary(op, operand) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::binary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    operand,
+                ));
+                id
+            }
+            Cast::Ternary(op, op1, op2) => {
+                let id = self.gen_id();
+                block.body.push(Instruction::ternary(
+                    op,
+                    result_type_id,
+                    id,
+                    expr_id,
+                    op1,
+                    op2,
+                ));
+                id
+            }
+        })
+    }
+
     /// Build an `OpAccessChain` instruction.
     ///
     /// Emit any needed bounds-checking expressions to `block`.
diff --git a/naga/src/back/spv/instructions.rs b/naga/src/back/spv/instructions.rs
index 9dd19b59ca..a34790614e 100644
--- a/naga/src/back/spv/instructions.rs
+++ b/naga/src/back/spv/instructions.rs
@@ -405,6 +405,10 @@ impl super::Instruction {
         instruction
     }
 
+    pub(super) fn constant_16bit(result_type_id: Word, id: Word, low: Word) -> Self {
+        Self::constant(result_type_id, id, &[low])
+    }
+
     pub(super) fn constant_32bit(result_type_id: Word, id: Word, value: Word) -> Self {
         Self::constant(result_type_id, id, &[value])
     }
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index d29f684420..7b2f15c367 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -1154,6 +1154,15 @@ impl Writer {
                 if bits == 64 {
                     self.capabilities_used.insert(spirv::Capability::Float64);
                 }
+                if bits == 16 {
+                    self.capabilities_used.insert(spirv::Capability::Float16);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
+                    self.capabilities_used
+                        .insert(spirv::Capability::StorageInputOutput16);
+                }
                 Instruction::type_float(id, bits)
             }
             Sk::Bool => Instruction::type_bool(id),
@@ -1222,6 +1231,19 @@ impl Writer {
                 )?;
                 self.use_extension("SPV_EXT_shader_atomic_float_add");
             }
+            // 16 bit floating-point support requires Float16 capability
+            crate::TypeInner::Matrix {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Vector {
+                scalar: crate::Scalar::F16,
+                ..
+            }
+            | crate::TypeInner::Scalar(crate::Scalar::F16) => {
+                self.require_any("16 bit floating-point", &[spirv::Capability::Float16])?;
+                self.use_extension("SPV_KHR_16bit_storage");
+            }
             _ => {}
         }
         Ok(())
@@ -1498,6 +1520,10 @@ impl Writer {
                 Instruction::constant_64bit(type_id, id, bits as u32, (bits >> 32) as u32)
             }
             crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
+            crate::Literal::F16(value) => {
+                let low = value.to_bits();
+                Instruction::constant_16bit(type_id, id, low as u32)
+            }
             crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
             crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
             crate::Literal::U64(value) => {
diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs
index cab4d1e5a8..62ad3aa644 100644
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@@ -5,6 +5,7 @@ use alloc::{
     vec::Vec,
 };
 use core::fmt::Write;
+use hashbrown::HashSet;
 
 use super::Error;
 use super::ToWgslIfImplemented as _;
@@ -129,6 +130,9 @@ impl<W: Write> Writer<W> {
         // Write all needed directives.
         self.write_enable_dual_source_blending_if_needed(module)?;
 
+        // Write all `enable` declarations
+        self.write_enable_declarations(module)?;
+
         // Write all structs
         for (handle, ty) in module.types.iter() {
             if let TypeInner::Struct { ref members, .. } = ty.inner {
@@ -222,6 +226,41 @@ impl<W: Write> Writer<W> {
         Ok(())
     }
 
+    /// Helper method which writes all the `enable` declarations
+    /// needed for a module.
+    fn write_enable_declarations(&mut self, module: &Module) -> BackendResult {
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        enum WrittenDeclarations {
+            F16,
+        }
+
+        let mut written_declarations = HashSet::new();
+
+        // Write all the `enable` declarations
+        for (_, ty) in module.types.iter() {
+            match ty.inner {
+                TypeInner::Scalar(scalar)
+                | TypeInner::Vector { scalar, .. }
+                | TypeInner::Matrix { scalar, .. } => {
+                    if scalar == crate::Scalar::F16
+                        && !written_declarations.contains(&WrittenDeclarations::F16)
+                    {
+                        writeln!(self.out, "enable f16;")?;
+                        written_declarations.insert(WrittenDeclarations::F16);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if !written_declarations.is_empty() {
+            // Empty line for readability
+            writeln!(self.out)?;
+        }
+
+        Ok(())
+    }
+
     /// Helper method used to write
     /// [functions](https://gpuweb.github.io/gpuweb/wgsl/#functions)
     ///
@@ -1092,6 +1131,7 @@ impl<W: Write> Writer<W> {
 
         match expressions[expr] {
             Expression::Literal(literal) => match literal {
+                crate::Literal::F16(value) => write!(self.out, "{value}h")?,
                 crate::Literal::F32(value) => write!(self.out, "{value}f")?,
                 crate::Literal::U32(value) => write!(self.out, "{value}u")?,
                 crate::Literal::I32(value) => {
diff --git a/naga/src/common/wgsl/to_wgsl.rs b/naga/src/common/wgsl/to_wgsl.rs
index 08783ed067..73dea711e7 100644
--- a/naga/src/common/wgsl/to_wgsl.rs
+++ b/naga/src/common/wgsl/to_wgsl.rs
@@ -270,6 +270,7 @@ impl TryToWgsl for crate::Scalar {
         Some(match self {
             Scalar::F64 => "f64",
             Scalar::F32 => "f32",
+            Scalar::F16 => "f16",
             Scalar::I32 => "i32",
             Scalar::U32 => "u32",
             Scalar::I64 => "i64",
diff --git a/naga/src/front/glsl/error.rs b/naga/src/front/glsl/error.rs
index c0927e98a4..966c97e51e 100644
--- a/naga/src/front/glsl/error.rs
+++ b/naga/src/front/glsl/error.rs
@@ -109,9 +109,15 @@ pub enum ErrorKind {
     /// Unsupported matrix of the form matCx2
     ///
     /// Our IR expects matrices of the form matCx2 to have a stride of 8 however
-    /// matrices in the std140 layout have a stride of at least 16
-    #[error("unsupported matrix of the form matCx2 in std140 block layout")]
-    UnsupportedMatrixTypeInStd140,
+    /// matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form matCx2 (in this case mat{columns}x2) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedMatrixWithTwoRowsInStd140 { columns: u8 },
+    /// Unsupported matrix of the form f16matCxR
+    ///
+    /// Our IR expects matrices of the form f16matCxR to have a stride of 4/8/8 depending on row-count,
+    /// however matrices in the std140 layout have a stride of at least 16.
+    #[error("unsupported matrix of the form f16matCxR (in this case f16mat{columns}x{rows}) in std140 block layout. See https://github.com/gfx-rs/wgpu/issues/4375")]
+    UnsupportedF16MatrixInStd140 { columns: u8, rows: u8 },
     /// A variable with the same name already exists in the current scope.
     #[error("Variable already declared: {0}")]
     VariableAlreadyDeclared(String),
diff --git a/naga/src/front/glsl/offset.rs b/naga/src/front/glsl/offset.rs
index b17dbbecd0..1a2d909be1 100644
--- a/naga/src/front/glsl/offset.rs
+++ b/naga/src/front/glsl/offset.rs
@@ -122,11 +122,25 @@ pub fn calculate_offset(
             }
 
             // See comment on the error kind
-            if StructLayout::Std140 == layout && rows == crate::VectorSize::Bi {
-                errors.push(Error {
-                    kind: ErrorKind::UnsupportedMatrixTypeInStd140,
-                    meta,
-                });
+            if StructLayout::Std140 == layout {
+                // Do the f16 test first, as it's more specific
+                if scalar == Scalar::F16 {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedF16MatrixInStd140 {
+                            columns: columns as u8,
+                            rows: rows as u8,
+                        },
+                        meta,
+                    });
+                }
+                if rows == crate::VectorSize::Bi {
+                    errors.push(Error {
+                        kind: ErrorKind::UnsupportedMatrixWithTwoRowsInStd140 {
+                            columns: columns as u8,
+                        },
+                        meta,
+                    });
+                }
             }
 
             (align, align * columns as u32)
diff --git a/naga/src/front/glsl/types.rs b/naga/src/front/glsl/types.rs
index 72b10a62a9..dffdf96e6d 100644
--- a/naga/src/front/glsl/types.rs
+++ b/naga/src/front/glsl/types.rs
@@ -12,6 +12,10 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
             name: None,
             inner: TypeInner::Scalar(Scalar::BOOL),
         }),
+        "float16_t" => Some(Type {
+            name: None,
+            inner: TypeInner::Scalar(Scalar::F16),
+        }),
         "float" => Some(Type {
             name: None,
             inner: TypeInner::Scalar(Scalar::F32),
@@ -42,6 +46,7 @@ pub fn parse_type(type_name: &str) -> Option<Type> {
                     "i" => Scalar::I32,
                     "u" => Scalar::U32,
                     "d" => Scalar::F64,
+                    "f16" => Scalar::F16,
                     _ => return None,
                 })
             }
diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs
index 0fe1396ccc..ca40b8efd0 100644
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@@ -39,6 +39,7 @@ use alloc::{borrow::ToOwned, format, string::String, vec, vec::Vec};
 use core::{convert::TryInto, mem, num::NonZeroU32};
 use std::path::PathBuf;
 
+use half::f16;
 use petgraph::graphmap::GraphMap;
 
 use super::atomic_upgrade::Upgrades;
@@ -82,6 +83,7 @@ pub const SUPPORTED_EXTENSIONS: &[&str] = &[
     "SPV_KHR_vulkan_memory_model",
     "SPV_KHR_multiview",
     "SPV_EXT_shader_atomic_float_add",
+    "SPV_KHR_16bit_storage",
 ];
 pub const SUPPORTED_EXT_SETS: &[&str] = &["GLSL.std.450"];
 
@@ -5604,6 +5606,9 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
             }) => {
                 let low = self.next()?;
                 match width {
+                    // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Literal
+                    // If a numeric type’s bit width is less than 32-bits, the value appears in the low-order bits of the word.
+                    2 => crate::Literal::F16(f16::from_bits(low as u16)),
                     4 => crate::Literal::F32(f32::from_bits(low)),
                     8 => {
                         inst.expect(5)?;
diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs
index eed18b9a13..aae67a94b5 100644
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -153,8 +153,6 @@ pub enum NumberError {
     Invalid,
     #[error("numeric literal not representable by target type")]
     NotRepresentable,
-    #[error("unimplemented f16 type")]
-    UnimplementedF16,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -1026,19 +1024,18 @@ impl<'a> Error<'a> {
                 )],
             },
             Error::EnableExtensionNotEnabled { kind, span } => ParseError {
-                message: format!("`{}` enable-extension is not enabled", kind.to_ident()),
+                message: format!("the `{}` language extension is not enabled", kind.to_ident()),
                 labels: vec![(
                     span,
                     format!(
                         concat!(
-                            "the `{}` enable-extension is needed for this functionality, ",
-                            "but it is not currently enabled"
+                            "the `{0}` language extension is needed for this functionality, ",
+                            "but it is not currently enabled."
                         ),
                         kind.to_ident()
                     )
                     .into(),
                 )],
-                #[allow(irrefutable_let_patterns)]
                 notes: if let EnableExtension::Unimplemented(kind) = kind {
                     vec![format!(
                         concat!(
@@ -1050,7 +1047,12 @@ impl<'a> Error<'a> {
                         kind.tracking_issue_num()
                     )]
                 } else {
-                    vec![]
+                    vec![
+                        format!(
+                            "You can enable this extension by adding `enable {};` at the top of the shader.",
+                            kind.to_ident()
+                        ),
+                    ]
                 },
             },
             Error::LanguageExtensionNotYetImplemented { kind, span } => ParseError {
diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs
index 0a13f438f0..d63443739e 100644
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@@ -2013,6 +2013,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
         let expr: Typed<crate::Expression> = match *expr {
             ast::Expression::Literal(literal) => {
                 let literal = match literal {
+                    ast::Literal::Number(Number::F16(f)) => crate::Literal::F16(f),
                     ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
                     ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
                     ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
diff --git a/naga/src/front/wgsl/parse/conv.rs b/naga/src/front/wgsl/parse/conv.rs
index 93ee0dfed2..e130fb9d0f 100644
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@@ -1,3 +1,6 @@
+use crate::front::wgsl::parse::directive::enable_extension::{
+    EnableExtensions, ImplementedEnableExtension,
+};
 use crate::front::wgsl::{Error, Result, Scalar};
 use crate::Span;
 
@@ -113,10 +116,17 @@ pub fn map_storage_format(word: &str, span: Span) -> Result<'_, crate::StorageFo
     })
 }
 
-pub fn get_scalar_type(word: &str) -> Option<Scalar> {
+pub fn get_scalar_type(
+    enable_extensions: &EnableExtensions,
+    span: Span,
+    word: &str,
+) -> Result<'static, Option<Scalar>> {
     use crate::ScalarKind as Sk;
-    match word {
-        // "f16" => Some(Scalar { kind: Sk::Float, width: 2 }),
+    let scalar = match word {
+        "f16" => Some(Scalar {
+            kind: Sk::Float,
+            width: 2,
+        }),
         "f32" => Some(Scalar {
             kind: Sk::Float,
             width: 4,
@@ -146,7 +156,18 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
             width: crate::BOOL_WIDTH,
         }),
         _ => None,
+    };
+
+    if matches!(scalar, Some(Scalar::F16))
+        && !enable_extensions.contains(ImplementedEnableExtension::F16)
+    {
+        return Err(Box::new(Error::EnableExtensionNotEnabled {
+            span,
+            kind: ImplementedEnableExtension::F16.into(),
+        }));
     }
+
+    Ok(scalar)
 }
 
 pub fn map_derivative(word: &str) -> Option<(crate::DerivativeAxis, crate::DerivativeControl)> {
diff --git a/naga/src/front/wgsl/parse/directive/enable_extension.rs b/naga/src/front/wgsl/parse/directive/enable_extension.rs
index e4ad7e1399..4fa9799882 100644
--- a/naga/src/front/wgsl/parse/directive/enable_extension.rs
+++ b/naga/src/front/wgsl/parse/directive/enable_extension.rs
@@ -11,19 +11,23 @@ use alloc::boxed::Box;
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct EnableExtensions {
     dual_source_blending: bool,
+    /// Whether `enable f16;` was written earlier in the shader module.
+    f16: bool,
 }
 
 impl EnableExtensions {
     pub(crate) const fn empty() -> Self {
         Self {
+            f16: false,
             dual_source_blending: false,
         }
     }
 
     /// Add an enable-extension to the set requested by a module.
     pub(crate) fn add(&mut self, ext: ImplementedEnableExtension) {
-        let field: &mut bool = match ext {
+        let field = match ext {
             ImplementedEnableExtension::DualSourceBlending => &mut self.dual_source_blending,
+            ImplementedEnableExtension::F16 => &mut self.f16,
         };
         *field = true;
     }
@@ -32,6 +36,7 @@ impl EnableExtensions {
     pub(crate) const fn contains(&self, ext: ImplementedEnableExtension) -> bool {
         match ext {
             ImplementedEnableExtension::DualSourceBlending => self.dual_source_blending,
+            ImplementedEnableExtension::F16 => self.f16,
         }
     }
 }
@@ -47,7 +52,6 @@ impl Default for EnableExtensions {
 /// WGSL spec.: <https://www.w3.org/TR/WGSL/#enable-extensions-sec>
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
 pub enum EnableExtension {
-    #[allow(unused)]
     Implemented(ImplementedEnableExtension),
     Unimplemented(UnimplementedEnableExtension),
 }
@@ -66,7 +70,7 @@ impl EnableExtension {
     /// Convert from a sentinel word in WGSL into its associated [`EnableExtension`], if possible.
     pub(crate) fn from_ident(word: &str, span: Span) -> Result<Self> {
         Ok(match word {
-            Self::F16 => Self::Unimplemented(UnimplementedEnableExtension::F16),
+            Self::F16 => Self::Implemented(ImplementedEnableExtension::F16),
             Self::CLIP_DISTANCES => {
                 Self::Unimplemented(UnimplementedEnableExtension::ClipDistances)
             }
@@ -82,10 +86,9 @@ impl EnableExtension {
         match self {
             Self::Implemented(kind) => match kind {
                 ImplementedEnableExtension::DualSourceBlending => Self::DUAL_SOURCE_BLENDING,
+                ImplementedEnableExtension::F16 => Self::F16,
             },
-
             Self::Unimplemented(kind) => match kind {
-                UnimplementedEnableExtension::F16 => Self::F16,
                 UnimplementedEnableExtension::ClipDistances => Self::CLIP_DISTANCES,
             },
         }
@@ -101,17 +104,17 @@ pub enum ImplementedEnableExtension {
     ///
     /// [`enable dual_source_blending;`]: https://www.w3.org/TR/WGSL/#extension-dual_source_blending
     DualSourceBlending,
-}
-
-/// A variant of [`EnableExtension::Unimplemented`].
-#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub enum UnimplementedEnableExtension {
     /// Enables `f16`/`half` primitive support in all shader languages.
     ///
     /// In the WGSL standard, this corresponds to [`enable f16;`].
     ///
     /// [`enable f16;`]: https://www.w3.org/TR/WGSL/#extension-f16
     F16,
+}
+
+/// A variant of [`EnableExtension::Unimplemented`].
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+pub enum UnimplementedEnableExtension {
     /// Enables the `clip_distances` variable in WGSL.
     ///
     /// In the WGSL standard, this corresponds to [`enable clip_distances;`].
@@ -123,7 +126,6 @@ pub enum UnimplementedEnableExtension {
 impl UnimplementedEnableExtension {
     pub(crate) const fn tracking_issue_num(self) -> u16 {
         match self {
-            Self::F16 => 4384,
             Self::ClipDistances => 6236,
         }
     }
diff --git a/naga/src/front/wgsl/parse/lexer.rs b/naga/src/front/wgsl/parse/lexer.rs
index 374a1dbc98..378af3d935 100644
--- a/naga/src/front/wgsl/parse/lexer.rs
+++ b/naga/src/front/wgsl/parse/lexer.rs
@@ -413,14 +413,17 @@ impl<'a> Lexer<'a> {
     /// Parses a generic scalar type, for example `<f32>`.
     pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
+        let (scalar, _span) = match self.next() {
             (Token::Word(word), span) => {
-                conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
             }
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok(scalar)
     }
 
     /// Parses a generic scalar type, for example `<f32>`.
@@ -430,14 +433,18 @@ impl<'a> Lexer<'a> {
         &mut self,
     ) -> Result<'a, (Scalar, Span)> {
         self.expect_generic_paren('<')?;
-        let pair = match self.next() {
-            (Token::Word(word), span) => conv::get_scalar_type(word)
-                .map(|scalar| (scalar, span))
-                .ok_or(Error::UnknownScalarType(span)),
-            (_, span) => Err(Error::UnknownScalarType(span)),
-        }?;
+
+        let (scalar, span) = match self.next() {
+            (Token::Word(word), span) => {
+                conv::get_scalar_type(&self.enable_extensions, span, word)?
+                    .map(|scalar| (scalar, span))
+                    .ok_or(Error::UnknownScalarType(span))?
+            }
+            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
+        };
+
         self.expect_generic_paren('>')?;
-        Ok(pair)
+        Ok((scalar, span))
     }
 
     pub(in crate::front::wgsl) fn next_storage_access(
@@ -518,6 +525,7 @@ fn sub_test(source: &str, expected_tokens: &[Token]) {
 
 #[test]
 fn test_numbers() {
+    use half::f16;
     // WGSL spec examples //
 
     // decimal integer
@@ -542,14 +550,16 @@ fn test_numbers() {
             Token::Number(Ok(Number::AbstractFloat(0.01))),
             Token::Number(Ok(Number::AbstractFloat(12.34))),
             Token::Number(Ok(Number::F32(0.))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
             Token::Number(Ok(Number::AbstractFloat(0.001))),
             Token::Number(Ok(Number::AbstractFloat(43.75))),
             Token::Number(Ok(Number::F32(16.))),
             Token::Number(Ok(Number::AbstractFloat(0.1875))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
             Token::Number(Ok(Number::AbstractFloat(0.12109375))),
-            Token::Number(Err(NumberError::UnimplementedF16)),
+            // https://github.com/gfx-rs/wgpu/issues/7046
+            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
         ],
     );
 
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index ce02f93b31..cb59a82a24 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -6,9 +6,7 @@ use crate::diagnostic_filter::{
     ShouldConflictOnFullDuplicate, StandardFilterableTriggeringRule,
 };
 use crate::front::wgsl::error::{DiagnosticAttributeNotSupportedPosition, Error, ExpectedToken};
-use crate::front::wgsl::parse::directive::enable_extension::{
-    EnableExtension, EnableExtensions, UnimplementedEnableExtension,
-};
+use crate::front::wgsl::parse::directive::enable_extension::{EnableExtension, EnableExtensions};
 use crate::front::wgsl::parse::directive::language_extension::LanguageExtension;
 use crate::front::wgsl::parse::directive::DirectiveKind;
 use crate::front::wgsl::parse::lexer::{Lexer, Token};
@@ -364,7 +362,7 @@ impl Parser {
         span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<'a, Option<ast::ConstructorType<'a>>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::ConstructorType::Scalar(scalar)));
         }
 
@@ -393,6 +391,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec2h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec3" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Tri,
             },
@@ -417,6 +422,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec3h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "vec4" => ast::ConstructorType::PartialVector {
                 size: crate::VectorSize::Quad,
             },
@@ -441,6 +453,13 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "vec4h" => {
+                return Ok(Some(ast::ConstructorType::Vector {
+                    size: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Bi,
@@ -453,6 +472,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Tri,
@@ -465,6 +492,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat2x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Bi,
                 rows: crate::VectorSize::Quad,
@@ -477,6 +512,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat2x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Bi,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Bi,
@@ -489,6 +532,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Tri,
@@ -501,6 +552,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat3x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Tri,
                 rows: crate::VectorSize::Quad,
@@ -513,6 +572,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat3x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Tri,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x2" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Bi,
@@ -525,6 +592,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x2h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Bi,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x3" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Tri,
@@ -537,6 +612,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x3h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Tri,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "mat4x4" => ast::ConstructorType::PartialMatrix {
                 columns: crate::VectorSize::Quad,
                 rows: crate::VectorSize::Quad,
@@ -549,6 +632,14 @@ impl Parser {
                     ty_span: Span::UNDEFINED,
                 }))
             }
+            "mat4x4h" => {
+                return Ok(Some(ast::ConstructorType::Matrix {
+                    columns: crate::VectorSize::Quad,
+                    rows: crate::VectorSize::Quad,
+                    ty: ctx.new_scalar(Scalar::F16),
+                    ty_span: Span::UNDEFINED,
+                }))
+            }
             "array" => ast::ConstructorType::PartialArray,
             "atomic"
             | "binding_array"
@@ -744,15 +835,17 @@ impl Parser {
             }
             (Token::Number(res), span) => {
                 let _ = lexer.next();
-                let num = res.map_err(|err| match err {
-                    super::error::NumberError::UnimplementedF16 => {
-                        Error::EnableExtensionNotEnabled {
-                            kind: EnableExtension::Unimplemented(UnimplementedEnableExtension::F16),
+                let num = res.map_err(|err| Error::BadNumber(span, err))?;
+
+                if let Some(enable_extension) = num.required_enable_extension() {
+                    if !lexer.enable_extensions.contains(enable_extension) {
+                        return Err(Box::new(Error::EnableExtensionNotEnabled {
+                            kind: enable_extension.into(),
                             span,
-                        }
+                        }));
                     }
-                    err => Error::BadNumber(span, err),
-                })?;
+                }
+
                 ast::Expression::Literal(ast::Literal::Number(num))
             }
             (Token::Word("RAY_FLAG_NONE"), _) => {
@@ -1340,9 +1433,10 @@ impl Parser {
         &mut self,
         lexer: &mut Lexer<'a>,
         word: &'a str,
+        span: Span,
         ctx: &mut ExpressionContext<'a, '_, '_>,
     ) -> Result<'a, Option<ast::Type<'a>>> {
-        if let Some(scalar) = conv::get_scalar_type(word) {
+        if let Some(scalar) = conv::get_scalar_type(&lexer.enable_extensions, span, word)? {
             return Ok(Some(ast::Type::Scalar(scalar)));
         }
 
@@ -1370,6 +1464,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec2h" => ast::Type::Vector {
+                size: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec3" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1393,6 +1492,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec3h" => ast::Type::Vector {
+                size: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "vec4" => {
                 let (ty, ty_span) = self.singular_generic(lexer, ctx)?;
                 ast::Type::Vector {
@@ -1416,6 +1520,11 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "vec4h" => ast::Type::Vector {
+                size: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Bi)?
             }
@@ -1425,6 +1534,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Tri)?
             }
@@ -1434,6 +1549,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat2x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Bi, crate::VectorSize::Quad)?
             }
@@ -1443,6 +1564,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat2x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Bi,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Bi)?
             }
@@ -1452,6 +1579,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Tri)?
             }
@@ -1461,6 +1594,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat3x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Tri, crate::VectorSize::Quad)?
             }
@@ -1470,6 +1609,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat3x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Tri,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x2" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Bi)?
             }
@@ -1479,6 +1624,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x2h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Bi,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x3" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Tri)?
             }
@@ -1488,6 +1639,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x3h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Tri,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "mat4x4" => {
                 self.matrix_with_type(lexer, ctx, crate::VectorSize::Quad, crate::VectorSize::Quad)?
             }
@@ -1497,6 +1654,12 @@ impl Parser {
                 ty: ctx.new_scalar(Scalar::F32),
                 ty_span: Span::UNDEFINED,
             },
+            "mat4x4h" => ast::Type::Matrix {
+                columns: crate::VectorSize::Quad,
+                rows: crate::VectorSize::Quad,
+                ty: ctx.new_scalar(Scalar::F16),
+                ty_span: Span::UNDEFINED,
+            },
             "atomic" => {
                 let scalar = lexer.next_scalar_generic()?;
                 ast::Type::Atomic(scalar)
@@ -1763,7 +1926,7 @@ impl Parser {
 
             let (name, span) = lexer.next_ident_with_span()?;
 
-            let ty = match this.type_decl_impl(lexer, name, ctx)? {
+            let ty = match this.type_decl_impl(lexer, name, span, ctx)? {
                 Some(ty) => ty,
                 None => {
                     ctx.unresolved.insert(ast::Dependency {
diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs
index e447c12c99..776c357343 100644
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@@ -1,7 +1,9 @@
 use alloc::format;
 
 use crate::front::wgsl::error::NumberError;
+use crate::front::wgsl::parse::directive::enable_extension::ImplementedEnableExtension;
 use crate::front::wgsl::parse::lexer::Token;
+use half::f16;
 
 /// When using this type assume no Abstract Int/Float for now
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -18,12 +20,23 @@ pub enum Number {
     I64(i64),
     /// Concrete u64
     U64(u64),
+    /// Concrete f16
+    F16(f16),
     /// Concrete f32
     F32(f32),
     /// Concrete f64
     F64(f64),
 }
 
+impl Number {
+    pub(super) const fn required_enable_extension(&self) -> Option<ImplementedEnableExtension> {
+        match *self {
+            Number::F16(_) => Some(ImplementedEnableExtension::F16),
+            _ => None,
+        }
+    }
+}
+
 pub(in crate::front::wgsl) fn consume_number(input: &str) -> (Token<'_>, &str) {
     let (result, rest) = parse(input);
     (Token::Number(result), rest)
@@ -369,7 +382,8 @@ fn parse_hex_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
             _ => Err(NumberError::NotRepresentable),
         },
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        // TODO: f16 is not supported by hexf_parse
+        Some(FloatKind::F16) => Err(NumberError::NotRepresentable),
         Some(FloatKind::F32) => match hexf_parse::parse_hexf32(input, false) {
             Ok(num) => Ok(Number::F32(num)),
             // can only be ParseHexfErrorKind::Inexact but we can't check since it's private
@@ -405,7 +419,12 @@ fn parse_dec_float(input: &str, kind: Option<FloatKind>) -> Result<Number, Numbe
                 .then_some(Number::F64(num))
                 .ok_or(NumberError::NotRepresentable)
         }
-        Some(FloatKind::F16) => Err(NumberError::UnimplementedF16),
+        Some(FloatKind::F16) => {
+            let num = input.parse::<f16>().unwrap(); // will never fail
+            num.is_finite()
+                .then_some(Number::F16(num))
+                .ok_or(NumberError::NotRepresentable)
+        }
     }
 }
 
diff --git a/naga/src/ir/mod.rs b/naga/src/ir/mod.rs
index fcd152d94e..ed7fd7fece 100644
--- a/naga/src/ir/mod.rs
+++ b/naga/src/ir/mod.rs
@@ -225,6 +225,7 @@ use alloc::{string::String, vec::Vec};
 
 #[cfg(feature = "arbitrary")]
 use arbitrary::Arbitrary;
+use half::f16;
 #[cfg(feature = "deserialize")]
 use serde::Deserialize;
 #[cfg(feature = "serialize")]
@@ -818,6 +819,7 @@ pub enum Literal {
     F64(f64),
     /// May not be NaN or infinity.
     F32(f32),
+    F16(f16),
     U32(u32),
     I32(i32),
     U64(u64),
diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs
index 38eaabe3ce..b6254a3e10 100644
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@@ -7,6 +7,8 @@ use alloc::{
 use core::iter;
 
 use arrayvec::ArrayVec;
+use half::f16;
+use num_traits::{real::Real, FromPrimitive, One, ToPrimitive, Zero};
 
 use crate::{
     arena::{Arena, Handle, HandleVec, UniqueArena},
@@ -208,6 +210,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => AbstractFloat: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
         AbstractInt => AbstractInt: i64,
         U32 => U32: u32,
         I32 => I32: i32,
@@ -228,6 +231,7 @@ gen_component_wise_extractor! {
     literals: [
         AbstractFloat => Abstract: f64,
         F32 => F32: f32,
+        F16 => F16: f16,
     ],
     scalar_kinds: [
         Float,
@@ -253,6 +257,7 @@ gen_component_wise_extractor! {
         AbstractFloat => AbstractFloat: f64,
         AbstractInt => AbstractInt: i64,
         F32 => F32: f32,
+        F16 => F16: f16,
         I32 => I32: i32,
     ],
     scalar_kinds: [
@@ -1114,6 +1119,7 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_scalar(self, span, [arg], |args| match args {
                     Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])),
                     Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])),
+                    Scalar::F16([e]) => Ok(Scalar::F16([e.abs()])),
                     Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
                     Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
                     Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
@@ -1145,9 +1151,13 @@ impl<'a> ConstantEvaluator<'a> {
                     }
                 )
             }
-            crate::MathFunction::Saturate => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) })
-            }
+            crate::MathFunction::Saturate => component_wise_float(self, span, [arg], |e| match e {
+                Float::F16([e]) => Ok(Float::F16(
+                    [e.clamp(f16::from_f32(0.0), f16::from_f32(1.0))],
+                )),
+                Float::F32([e]) => Ok(Float::F32([e.clamp(0., 1.)])),
+                Float::Abstract([e]) => Ok(Float::Abstract([e.clamp(0., 1.)])),
+            }),
 
             // trigonometry
             crate::MathFunction::Cos => {
@@ -1201,7 +1211,35 @@ impl<'a> ConstantEvaluator<'a> {
                 component_wise_float!(self, span, [arg], |e| { Ok([e.floor()]) })
             }
             crate::MathFunction::Round => {
-                component_wise_float!(self, span, [arg], |e| { Ok([e.round_ties_even()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([e.round_ties_even()])),
+                    Float::F32([e]) => Ok(Float::F32([e.round_ties_even()])),
+                    Float::F16([e]) => {
+                        // TODO: `round_ties_even` is not available on `half::f16` yet.
+                        //
+                        // This polyfill is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source],
+                        // which has licensing compatible with ours. See also
+                        // <https://github.com/rust-lang/rust/issues/96710>.
+                        //
+                        // [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98
+                        fn round_ties_even(x: f64) -> f64 {
+                            let i = x as i64;
+                            let f = (x - i as f64).abs();
+                            if f == 0.5 {
+                                if i & 1 == 1 {
+                                    // -1.5, 1.5, 3.5, ...
+                                    (x.abs() + 0.5).copysign(x)
+                                } else {
+                                    (x.abs() - 0.5).copysign(x)
+                                }
+                            } else {
+                                x.round()
+                            }
+                        }
+
+                        Ok(Float::F16([(f16::from_f64(round_ties_even(f64::from(e))))]))
+                    }
+                })
             }
             crate::MathFunction::Fract => {
                 component_wise_float!(self, span, [arg], |e| {
@@ -1246,15 +1284,27 @@ impl<'a> ConstantEvaluator<'a> {
                 )
             }
             crate::MathFunction::Step => {
-                component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| {
-                    Ok([if edge <= x { 1.0 } else { 0.0 }])
+                component_wise_float(self, span, [arg, arg1.unwrap()], |x| match x {
+                    Float::Abstract([edge, x]) => {
+                        Ok(Float::Abstract([if edge <= x { 1.0 } else { 0.0 }]))
+                    }
+                    Float::F32([edge, x]) => Ok(Float::F32([if edge <= x { 1.0 } else { 0.0 }])),
+                    Float::F16([edge, x]) => Ok(Float::F16([if edge <= x {
+                        f16::one()
+                    } else {
+                        f16::zero()
+                    }])),
                 })
             }
             crate::MathFunction::Sqrt => {
                 component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) })
             }
             crate::MathFunction::InverseSqrt => {
-                component_wise_float!(self, span, [arg], |e| { Ok([1. / e.sqrt()]) })
+                component_wise_float(self, span, [arg], |e| match e {
+                    Float::Abstract([e]) => Ok(Float::Abstract([1. / e.sqrt()])),
+                    Float::F32([e]) => Ok(Float::F32([1. / e.sqrt()])),
+                    Float::F16([e]) => Ok(Float::F16([f16::from_f32(1. / f32::from(e).sqrt())])),
+                })
             }
 
             // bits
@@ -1549,6 +1599,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v,
                         Literal::U32(v) => v as i32,
                         Literal::F32(v) => v as i32,
+                        Literal::F16(v) => f16::to_i32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as i32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1560,6 +1611,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v as u32,
                         Literal::U32(v) => v,
                         Literal::F32(v) => v as u32,
+                        Literal::F16(v) => f16::to_u32(&v).unwrap(), //Only None on NaN or Inf
                         Literal::Bool(v) => v as u32,
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
@@ -1575,6 +1627,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as i64,
                         Literal::I64(v) => v,
                         Literal::U64(v) => v as i64,
+                        Literal::F16(v) => f16::to_i64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
                     }),
@@ -1586,9 +1639,22 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(v) => v as u64,
                         Literal::I64(v) => v as u64,
                         Literal::U64(v) => v,
+                        Literal::F16(v) => f16::to_u64(&v).unwrap(), //Only None on NaN or Inf
                         Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
                     }),
+                    Sc::F16 => Literal::F16(match literal {
+                        Literal::F16(v) => v,
+                        Literal::F32(v) => f16::from_f32(v),
+                        Literal::F64(v) => f16::from_f64(v),
+                        Literal::Bool(v) => f16::from_u32(v as u32).unwrap(),
+                        Literal::I64(v) => f16::from_i64(v).unwrap(),
+                        Literal::U64(v) => f16::from_u64(v).unwrap(),
+                        Literal::I32(v) => f16::from_i32(v).unwrap(),
+                        Literal::U32(v) => f16::from_u32(v).unwrap(),
+                        Literal::AbstractFloat(v) => f16::try_from_abstract(v)?,
+                        Literal::AbstractInt(v) => f16::try_from_abstract(v)?,
+                    }),
                     Sc::F32 => Literal::F32(match literal {
                         Literal::I32(v) => v as f32,
                         Literal::U32(v) => v as f32,
@@ -1597,12 +1663,14 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                             return make_error();
                         }
+                        Literal::F16(v) => f16::to_f32(v),
                         Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
                         Literal::AbstractFloat(v) => f32::try_from_abstract(v)?,
                     }),
                     Sc::F64 => Literal::F64(match literal {
                         Literal::I32(v) => v as f64,
                         Literal::U32(v) => v as f64,
+                        Literal::F16(v) => f16::to_f64(v),
                         Literal::F32(v) => v as f64,
                         Literal::F64(v) => v,
                         Literal::Bool(v) => v as u32 as f64,
@@ -1614,6 +1682,7 @@ impl<'a> ConstantEvaluator<'a> {
                         Literal::I32(v) => v != 0,
                         Literal::U32(v) => v != 0,
                         Literal::F32(v) => v != 0.0,
+                        Literal::F16(v) => v != f16::zero(),
                         Literal::Bool(v) => v,
                         Literal::F64(_)
                         | Literal::I64(_)
@@ -1770,6 +1839,7 @@ impl<'a> ConstantEvaluator<'a> {
                     Literal::I32(v) => Literal::I32(v.wrapping_neg()),
                     Literal::I64(v) => Literal::I64(v.wrapping_neg()),
                     Literal::F32(v) => Literal::F32(-v),
+                    Literal::F16(v) => Literal::F16(-v),
                     Literal::AbstractInt(v) => Literal::AbstractInt(v.wrapping_neg()),
                     Literal::AbstractFloat(v) => Literal::AbstractFloat(-v),
                     _ => return Err(ConstantEvaluatorError::InvalidUnaryOpArg),
@@ -1918,6 +1988,14 @@ impl<'a> ConstantEvaluator<'a> {
                                 _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
                             })
                         }
+                        (Literal::F16(a), Literal::F16(b)) => Literal::F16(match op {
+                            BinaryOperator::Add => a + b,
+                            BinaryOperator::Subtract => a - b,
+                            BinaryOperator::Multiply => a * b,
+                            BinaryOperator::Divide => a / b,
+                            BinaryOperator::Modulo => a % b,
+                            _ => return Err(ConstantEvaluatorError::InvalidBinaryOpArgs),
+                        }),
                         (Literal::AbstractInt(a), Literal::AbstractInt(b)) => {
                             Literal::AbstractInt(match op {
                                 BinaryOperator::Add => a.checked_add(b).ok_or_else(|| {
@@ -2522,6 +2600,32 @@ impl TryFromAbstract<f64> for u64 {
     }
 }
 
+impl TryFromAbstract<f64> for f16 {
+    fn try_from_abstract(value: f64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_f64(value);
+        if f.is_infinite() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f)
+    }
+}
+
+impl TryFromAbstract<i64> for f16 {
+    fn try_from_abstract(value: i64) -> Result<f16, ConstantEvaluatorError> {
+        let f = f16::from_i64(value);
+        if f.is_none() {
+            return Err(ConstantEvaluatorError::AutomaticConversionLossy {
+                value: format!("{value:?}"),
+                to_type: "f16",
+            });
+        }
+        Ok(f.unwrap())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use alloc::{vec, vec::Vec};
diff --git a/naga/src/proc/mod.rs b/naga/src/proc/mod.rs
index 84b4d1af42..3412f9cea4 100644
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@@ -80,6 +80,7 @@ impl From<super::StorageFormat> for super::Scalar {
 pub enum HashableLiteral {
     F64(u64),
     F32(u32),
+    F16(u16),
     U32(u32),
     I32(i32),
     U64(u64),
@@ -94,6 +95,7 @@ impl From<crate::Literal> for HashableLiteral {
         match l {
             crate::Literal::F64(v) => Self::F64(v.to_bits()),
             crate::Literal::F32(v) => Self::F32(v.to_bits()),
+            crate::Literal::F16(v) => Self::F16(v.to_bits()),
             crate::Literal::U32(v) => Self::U32(v),
             crate::Literal::I32(v) => Self::I32(v),
             crate::Literal::U64(v) => Self::U64(v),
@@ -132,6 +134,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
             Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
+            Self::F16(_) => 2,
             Self::Bool(_) => crate::BOOL_WIDTH,
             Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
         }
@@ -140,6 +143,7 @@ impl crate::Literal {
         match *self {
             Self::F64(_) => crate::Scalar::F64,
             Self::F32(_) => crate::Scalar::F32,
+            Self::F16(_) => crate::Scalar::F16,
             Self::U32(_) => crate::Scalar::U32,
             Self::I32(_) => crate::Scalar::I32,
             Self::U64(_) => crate::Scalar::U64,
diff --git a/naga/src/proc/type_methods.rs b/naga/src/proc/type_methods.rs
index 4eda07c9b7..d4089bc1fa 100644
--- a/naga/src/proc/type_methods.rs
+++ b/naga/src/proc/type_methods.rs
@@ -20,6 +20,10 @@ impl crate::ScalarKind {
 }
 
 impl crate::Scalar {
+    pub const F16: Self = Self {
+        kind: crate::ScalarKind::Float,
+        width: 2,
+    };
     pub const I32: Self = Self {
         kind: crate::ScalarKind::Sint,
         width: 4,
diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs
index 9d9e4d2f16..096bd4cae1 100644
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@@ -1794,7 +1794,7 @@ impl super::Validator {
     }
 
     pub fn validate_literal(&self, literal: crate::Literal) -> Result<(), LiteralError> {
-        self.check_width(literal.scalar())?;
+        let _ = self.check_width(literal.scalar())?;
         check_literal_value(literal)?;
 
         Ok(())
diff --git a/naga/src/valid/interface.rs b/naga/src/valid/interface.rs
index 4191d16e94..4db7a6354f 100644
--- a/naga/src/valid/interface.rs
+++ b/naga/src/valid/interface.rs
@@ -4,7 +4,7 @@ use bit_set::BitSet;
 
 use super::{
     analyzer::{FunctionInfo, GlobalUse},
-    Capabilities, Disalignment, FunctionError, ModuleInfo,
+    Capabilities, Disalignment, FunctionError, ModuleInfo, PushConstantError,
 };
 use crate::arena::{Handle, UniqueArena};
 use crate::span::{AddSpan as _, MapErrWithSpan as _, SpanProvider as _, WithSpan};
@@ -41,6 +41,8 @@ pub enum GlobalVariableError {
     InitializerNotAllowed(crate::AddressSpace),
     #[error("Storage address space doesn't support write-only access")]
     StorageAddressSpaceWriteOnlyNotSupported,
+    #[error("Type is not valid for use as a push constant")]
+    InvalidPushConstantType(#[source] PushConstantError),
 }
 
 #[derive(Clone, Debug, thiserror::Error)]
@@ -596,6 +598,9 @@ impl super::Validator {
                         Capabilities::PUSH_CONSTANT,
                     ));
                 }
+                if let Err(ref err) = type_info.push_constant_compatibility {
+                    return Err(GlobalVariableError::InvalidPushConstantType(err.clone()));
+                }
                 (
                     TypeFlags::DATA
                         | TypeFlags::COPY
diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs
index ec596a974b..8f5501bf02 100644
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@@ -31,7 +31,7 @@ pub use expression::{check_literal_value, LiteralError};
 pub use expression::{ConstExpressionError, ExpressionError};
 pub use function::{CallError, FunctionError, LocalVariableError};
 pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
-pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};
+pub use r#type::{Disalignment, PushConstantError, TypeError, TypeFlags, WidthError};
 
 use self::handles::InvalidHandleError;
 
@@ -163,6 +163,8 @@ bitflags::bitflags! {
         const TEXTURE_INT64_ATOMIC = 1 << 24;
         /// Support for ray queries returning vertex position
         const RAY_HIT_VERTEX_POSITION = 1 << 25;
+        /// Support for 16-bit floating-point types.
+        const SHADER_FLOAT16 = 1 << 26;
     }
 }
 
diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs
index 5863eb813f..b3ae13b7d4 100644
--- a/naga/src/valid/type.rs
+++ b/naga/src/valid/type.rs
@@ -169,8 +169,16 @@ pub enum WidthError {
     Abstract,
 }
 
+#[derive(Clone, Debug, thiserror::Error)]
+#[cfg_attr(test, derive(PartialEq))]
+pub enum PushConstantError {
+    #[error("The scalar type {0:?} is not supported in push constants")]
+    InvalidScalar(crate::Scalar),
+}
+
 // Only makes sense if `flags.contains(HOST_SHAREABLE)`
 type LayoutCompatibility = Result<Alignment, (Handle<crate::Type>, Disalignment)>;
+type PushConstantCompatibility = Result<(), PushConstantError>;
 
 fn check_member_layout(
     accum: &mut LayoutCompatibility,
@@ -221,6 +229,7 @@ pub(super) struct TypeInfo {
     pub flags: TypeFlags,
     pub uniform_layout: LayoutCompatibility,
     pub storage_layout: LayoutCompatibility,
+    pub push_constant_compatibility: PushConstantCompatibility,
 }
 
 impl TypeInfo {
@@ -229,6 +238,7 @@ impl TypeInfo {
             flags: TypeFlags::empty(),
             uniform_layout: Ok(Alignment::ONE),
             storage_layout: Ok(Alignment::ONE),
+            push_constant_compatibility: Ok(()),
         }
     }
 
@@ -237,6 +247,7 @@ impl TypeInfo {
             flags,
             uniform_layout: Ok(alignment),
             storage_layout: Ok(alignment),
+            push_constant_compatibility: Ok(()),
         }
     }
 }
@@ -250,11 +261,15 @@ impl super::Validator {
         }
     }
 
-    pub(super) const fn check_width(&self, scalar: crate::Scalar) -> Result<(), WidthError> {
+    pub(super) const fn check_width(
+        &self,
+        scalar: crate::Scalar,
+    ) -> Result<PushConstantCompatibility, WidthError> {
+        let mut push_constant_compatibility = Ok(());
         let good = match scalar.kind {
             crate::ScalarKind::Bool => scalar.width == crate::BOOL_WIDTH,
-            crate::ScalarKind::Float => {
-                if scalar.width == 8 {
+            crate::ScalarKind::Float => match scalar.width {
+                8 => {
                     if !self.capabilities.contains(Capabilities::FLOAT64) {
                         return Err(WidthError::MissingCapability {
                             name: "f64",
@@ -262,10 +277,21 @@ impl super::Validator {
                         });
                     }
                     true
-                } else {
-                    scalar.width == 4
                 }
-            }
+                2 => {
+                    if !self.capabilities.contains(Capabilities::SHADER_FLOAT16) {
+                        return Err(WidthError::MissingCapability {
+                            name: "f16",
+                            flag: "FLOAT16",
+                        });
+                    }
+
+                    push_constant_compatibility = Err(PushConstantError::InvalidScalar(scalar));
+
+                    true
+                }
+                _ => scalar.width == 4,
+            },
             crate::ScalarKind::Sint => {
                 if scalar.width == 8 {
                     if !self.capabilities.contains(Capabilities::SHADER_INT64) {
@@ -297,7 +323,7 @@ impl super::Validator {
             }
         };
         if good {
-            Ok(())
+            Ok(push_constant_compatibility)
         } else {
             Err(WidthError::Invalid(scalar.kind, scalar.width))
         }
@@ -317,13 +343,13 @@ impl super::Validator {
         use crate::TypeInner as Ti;
         Ok(match gctx.types[handle].inner {
             Ti::Scalar(scalar) => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -332,16 +358,18 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Vector { size, scalar } => {
-                self.check_width(scalar)?;
+                let push_constant_compatibility = self.check_width(scalar)?;
                 let shareable = if scalar.kind.is_numeric() {
                     TypeFlags::IO_SHAREABLE | TypeFlags::HOST_SHAREABLE
                 } else {
                     TypeFlags::empty()
                 };
-                TypeInfo::new(
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -350,7 +378,9 @@ impl super::Validator {
                         | TypeFlags::CREATION_RESOLVED
                         | shareable,
                     Alignment::from(size) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Matrix {
                 columns: _,
@@ -360,8 +390,8 @@ impl super::Validator {
                 if scalar.kind != crate::ScalarKind::Float {
                     return Err(TypeError::MatrixElementNotFloat);
                 }
-                self.check_width(scalar)?;
-                TypeInfo::new(
+                let push_constant_compatibility = self.check_width(scalar)?;
+                let mut type_info = TypeInfo::new(
                     TypeFlags::DATA
                         | TypeFlags::SIZED
                         | TypeFlags::COPY
@@ -370,7 +400,9 @@ impl super::Validator {
                         | TypeFlags::CONSTRUCTIBLE
                         | TypeFlags::CREATION_RESOLVED,
                     Alignment::from(rows) * Alignment::from_width(scalar.width),
-                )
+                );
+                type_info.push_constant_compatibility = push_constant_compatibility;
+                type_info
             }
             Ti::Atomic(scalar) => {
                 match scalar {
@@ -465,7 +497,7 @@ impl super::Validator {
                 // However, some cases are trivial: All our implicit base types
                 // are DATA and SIZED, so we can never return
                 // `InvalidPointerBase` or `InvalidPointerToUnsized`.
-                self.check_width(scalar)?;
+                let _ = self.check_width(scalar)?;
 
                 // `Validator::validate_function` actually checks the address
                 // space of pointer arguments explicitly before checking the
@@ -551,6 +583,7 @@ impl super::Validator {
                     flags: base_info.flags & type_info_mask,
                     uniform_layout,
                     storage_layout,
+                    push_constant_compatibility: base_info.push_constant_compatibility.clone(),
                 }
             }
             Ti::Struct { ref members, span } => {
@@ -631,6 +664,10 @@ impl super::Validator {
                         base_info.storage_layout,
                         handle,
                     );
+                    if base_info.push_constant_compatibility.is_err() {
+                        ti.push_constant_compatibility =
+                            base_info.push_constant_compatibility.clone();
+                    }
 
                     // Validate rule: If a structure member itself has a structure type S,
                     // then the number of bytes between the start of that member and
diff --git a/naga/tests/in/glsl/f16-glsl.comp b/naga/tests/in/glsl/f16-glsl.comp
new file mode 100644
index 0000000000..af8c89cb8a
--- /dev/null
+++ b/naga/tests/in/glsl/f16-glsl.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/glsl/f16-glsl.toml b/naga/tests/in/glsl/f16-glsl.toml
new file mode 100644
index 0000000000..fd242d5b5b
--- /dev/null
+++ b/naga/tests/in/glsl/f16-glsl.toml
@@ -0,0 +1 @@
+god_mode = true
diff --git a/naga/tests/in/spv/f16-spv.comp b/naga/tests/in/spv/f16-spv.comp
new file mode 100644
index 0000000000..af8c89cb8a
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.comp
@@ -0,0 +1,57 @@
+#version 460
+
+#extension GL_AMD_gpu_shader_half_float: enable 
+
+layout(set = 0, binding = 0) uniform A {
+    float16_t a_1;
+    f16vec2 a_vec2;
+    f16vec3 a_vec3;
+    f16vec4 a_vec4;
+    // So the rules here are particularly nasty for any f16 matries in uniform buffers
+    // as the stride is always rounded up to 16, meaning that _every_ f16 matrix in a uniform
+    // buffer is over-aligned to what naga-ir wants.
+    // 
+    // This is https://github.com/gfx-rs/wgpu/issues/4375.
+
+    // f16mat2 a_mat2;
+    // f16mat2x3 a_mat2x3;
+    // f16mat2x4 a_mat2x4;
+    // f16mat3x2 a_mat3x2;
+    // f16mat3 a_mat3;
+    // f16mat3x4 a_mat3x4;
+    // f16mat4x2 a_mat4x2;
+    // f16mat4x3 a_mat4x3;
+    // f16mat4 a_mat4;
+};
+
+layout(set = 0, binding = 1) buffer B {
+    float16_t b_1;
+    f16vec2 b_vec2;
+    f16vec3 b_vec3;
+    f16vec4 b_vec4;
+    f16mat2 b_mat2;
+    f16mat2x3 b_mat2x3;
+    f16mat2x4 b_mat2x4;
+    f16mat3x2 b_mat3x2;
+    f16mat3 b_mat3;
+    f16mat3x4 b_mat3x4;
+    f16mat4x2 b_mat4x2;
+    f16mat4x3 b_mat4x3;
+    f16mat4 b_mat4;
+};
+
+void main() {
+    b_1 = a_1;
+    b_vec2 = a_vec2;
+    b_vec3 = a_vec3;
+    b_vec4 = a_vec4;
+    // b_mat2 = a_mat2;
+    // b_mat2x3 = a_mat2x3;
+    // b_mat2x4 = a_mat2x4;
+    // b_mat3x2 = a_mat3x2;
+    // b_mat3 = a_mat3;
+    // b_mat3x4 = a_mat3x4;
+    // b_mat4x2 = a_mat4x2;
+    // b_mat4x3 = a_mat4x3;
+    // b_mat4 = a_mat4;
+}
diff --git a/naga/tests/in/spv/f16-spv.spvasm b/naga/tests/in/spv/f16-spv.spvasm
new file mode 100644
index 0000000000..806de32754
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.spvasm
@@ -0,0 +1,130 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 11
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageBuffer16BitAccess
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %B "B"
+               OpMemberName %B 0 "b_1"
+               OpMemberName %B 1 "b_vec2"
+               OpMemberName %B 2 "b_vec3"
+               OpMemberName %B 3 "b_vec4"
+               OpMemberName %B 4 "b_mat2"
+               OpMemberName %B 5 "b_mat2x3"
+               OpMemberName %B 6 "b_mat2x4"
+               OpMemberName %B 7 "b_mat3x2"
+               OpMemberName %B 8 "b_mat3"
+               OpMemberName %B 9 "b_mat3x4"
+               OpMemberName %B 10 "b_mat4x2"
+               OpMemberName %B 11 "b_mat4x3"
+               OpMemberName %B 12 "b_mat4"
+               OpName %_ ""
+               OpName %A "A"
+               OpMemberName %A 0 "a_1"
+               OpMemberName %A 1 "a_vec2"
+               OpMemberName %A 2 "a_vec3"
+               OpMemberName %A 3 "a_vec4"
+               OpName %__0 ""
+               OpDecorate %B BufferBlock
+               OpMemberDecorate %B 0 Offset 0
+               OpMemberDecorate %B 1 Offset 4
+               OpMemberDecorate %B 2 Offset 8
+               OpMemberDecorate %B 3 Offset 16
+               OpMemberDecorate %B 4 ColMajor
+               OpMemberDecorate %B 4 MatrixStride 4
+               OpMemberDecorate %B 4 Offset 24
+               OpMemberDecorate %B 5 ColMajor
+               OpMemberDecorate %B 5 MatrixStride 8
+               OpMemberDecorate %B 5 Offset 32
+               OpMemberDecorate %B 6 ColMajor
+               OpMemberDecorate %B 6 MatrixStride 8
+               OpMemberDecorate %B 6 Offset 48
+               OpMemberDecorate %B 7 ColMajor
+               OpMemberDecorate %B 7 MatrixStride 4
+               OpMemberDecorate %B 7 Offset 64
+               OpMemberDecorate %B 8 ColMajor
+               OpMemberDecorate %B 8 MatrixStride 8
+               OpMemberDecorate %B 8 Offset 80
+               OpMemberDecorate %B 9 ColMajor
+               OpMemberDecorate %B 9 MatrixStride 8
+               OpMemberDecorate %B 9 Offset 104
+               OpMemberDecorate %B 10 ColMajor
+               OpMemberDecorate %B 10 MatrixStride 4
+               OpMemberDecorate %B 10 Offset 128
+               OpMemberDecorate %B 11 ColMajor
+               OpMemberDecorate %B 11 MatrixStride 8
+               OpMemberDecorate %B 11 Offset 144
+               OpMemberDecorate %B 12 ColMajor
+               OpMemberDecorate %B 12 MatrixStride 8
+               OpMemberDecorate %B 12 Offset 176
+               OpDecorate %_ Binding 1
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %A Block
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 4
+               OpMemberDecorate %A 2 Offset 8
+               OpMemberDecorate %A 3 Offset 16
+               OpDecorate %__0 Binding 0
+               OpDecorate %__0 DescriptorSet 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %B = OpTypeStruct %half %v2half %v3half %v4half %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half
+%_ptr_Uniform_B = OpTypePointer Uniform %B
+          %_ = OpVariable %_ptr_Uniform_B Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+          %A = OpTypeStruct %half %v2half %v3half %v4half
+%_ptr_Uniform_A = OpTypePointer Uniform %A
+        %__0 = OpVariable %_ptr_Uniform_A Uniform
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_half %__0 %int_0
+         %29 = OpLoad %half %28
+         %30 = OpAccessChain %_ptr_Uniform_half %_ %int_0
+               OpStore %30 %29
+         %33 = OpAccessChain %_ptr_Uniform_v2half %__0 %int_1
+         %34 = OpLoad %v2half %33
+         %35 = OpAccessChain %_ptr_Uniform_v2half %_ %int_1
+               OpStore %35 %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %__0 %int_2
+         %39 = OpLoad %v3half %38
+         %40 = OpAccessChain %_ptr_Uniform_v3half %_ %int_2
+               OpStore %40 %39
+         %43 = OpAccessChain %_ptr_Uniform_v4half %__0 %int_3
+         %44 = OpLoad %v4half %43
+         %45 = OpAccessChain %_ptr_Uniform_v4half %_ %int_3
+               OpStore %45 %44
+               OpReturn
+               OpFunctionEnd
diff --git a/naga/tests/in/spv/f16-spv.toml b/naga/tests/in/spv/f16-spv.toml
new file mode 100644
index 0000000000..fd242d5b5b
--- /dev/null
+++ b/naga/tests/in/spv/f16-spv.toml
@@ -0,0 +1 @@
+god_mode = true
diff --git a/naga/tests/in/wgsl/extra.wgsl b/naga/tests/in/wgsl/extra.wgsl
index ef68f4aa80..1a7ab91ff5 100644
--- a/naga/tests/in/wgsl/extra.wgsl
+++ b/naga/tests/in/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 var<push_constant> pc: PushConstants;
 
diff --git a/naga/tests/in/wgsl/f16.toml b/naga/tests/in/wgsl/f16.toml
new file mode 100644
index 0000000000..04b61f6b30
--- /dev/null
+++ b/naga/tests/in/wgsl/f16.toml
@@ -0,0 +1,15 @@
+# No GLSL support for f16
+targets = "SPIRV | METAL | HLSL | WGSL"
+god_mode = true
+
+[spv]
+version = [1, 0]
+
+[hlsl]
+shader_model = "V6_2"
+special_constants_binding = { space = 1, register = 0 }
+push_constants_target = { space = 0, register = 0 }
+
+lang_version = [1, 0]
+per_entry_point_map = {}
+zero_initialize_workgroup_memory = true
diff --git a/naga/tests/in/wgsl/f16.wgsl b/naga/tests/in/wgsl/f16.wgsl
new file mode 100644
index 0000000000..dbef9aedfc
--- /dev/null
+++ b/naga/tests/in/wgsl/f16.wgsl
@@ -0,0 +1,127 @@
+enable f16;
+
+var<private> private_variable: f16 = 1h;
+const constant_variable: f16 = f16(15.2);
+
+struct UniformCompatible {
+   // Other types
+   val_u32: u32,
+   val_i32: i32,
+   val_f32: f32,
+
+   // f16
+   val_f16: f16,
+   val_f16_2: vec2<f16>,
+   val_f16_3: vec3<f16>,
+   val_f16_4: vec4<f16>,
+   final_value: f16,
+
+   val_mat2x2: mat2x2<f16>,
+   val_mat2x3: mat2x3<f16>,
+   val_mat2x4: mat2x4<f16>,
+   val_mat3x2: mat3x2<f16>,
+   val_mat3x3: mat3x3<f16>,
+   val_mat3x4: mat3x4<f16>,
+   val_mat4x2: mat4x2<f16>,
+   val_mat4x3: mat4x3<f16>,
+   val_mat4x4: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+   val_f16_array_2: array<f16, 2>,
+}
+
+struct LayoutTest {
+   scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32
+}
+
+@group(0) @binding(0)
+var<uniform> input_uniform: UniformCompatible;
+
+@group(0) @binding(1)
+var<storage> input_storage: UniformCompatible;
+
+@group(0) @binding(2)
+var<storage> input_arrays: StorageCompatible;
+
+@group(0) @binding(3)
+var<storage, read_write> output: UniformCompatible;
+
+@group(0) @binding(4)
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+   var val: f16 = f16(constant_variable);
+   // A number too big for f16
+   val += 1h - 33333h;
+   // Constructing an f16 from an AbstractInt
+   val += val + f16(5.);
+   // Constructing a f16 from other types and other types from f16.
+   val += f16(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<i64> from a i64
+   val += vec3<f16>(input_uniform.val_f16).z;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_f16 = input_uniform.val_f16 + input_storage.val_f16;
+   output.val_f16_2 = input_uniform.val_f16_2 + input_storage.val_f16_2;
+   output.val_f16_3 = input_uniform.val_f16_3 + input_storage.val_f16_3;
+   output.val_f16_4 = input_uniform.val_f16_4 + input_storage.val_f16_4;
+
+   output.val_mat2x2 = input_uniform.val_mat2x2 + input_storage.val_mat2x2;
+   output.val_mat2x3 = input_uniform.val_mat2x3 + input_storage.val_mat2x3;
+   output.val_mat2x4 = input_uniform.val_mat2x4 + input_storage.val_mat2x4;
+   output.val_mat3x2 = input_uniform.val_mat3x2 + input_storage.val_mat3x2;
+   output.val_mat3x3 = input_uniform.val_mat3x3 + input_storage.val_mat3x3;
+   output.val_mat3x4 = input_uniform.val_mat3x4 + input_storage.val_mat3x4;
+   output.val_mat4x2 = input_uniform.val_mat4x2 + input_storage.val_mat4x2;
+   output.val_mat4x3 = input_uniform.val_mat4x3 + input_storage.val_mat4x3;
+   output.val_mat4x4 = input_uniform.val_mat4x4 + input_storage.val_mat4x4;
+
+   output_arrays.val_f16_array_2 = input_arrays.val_f16_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   val += dot(vec2(val), vec2(val));
+   val += max(val, val);
+   val += min(val, val);
+   val += sign(val);
+   
+   val += f16(1.0);
+
+   // We use the shorthand aliases here to ensure the aliases
+   // work correctly.
+
+   // Cast vectors to/from f32
+   let float_vec2 = vec2f(input_uniform.val_f16_2);
+   output.val_f16_2 = vec2h(float_vec2);
+
+   let float_vec3 = vec3f(input_uniform.val_f16_3);
+   output.val_f16_3 = vec3h(float_vec3);
+
+   let float_vec4 = vec4f(input_uniform.val_f16_4);
+   output.val_f16_4 = vec4h(float_vec4);
+
+   // Cast matrices to/from f32
+   output.val_mat2x2 = mat2x2h(mat2x2f(input_uniform.val_mat2x2));
+   output.val_mat2x3 = mat2x3h(mat2x3f(input_uniform.val_mat2x3));
+   output.val_mat2x4 = mat2x4h(mat2x4f(input_uniform.val_mat2x4));
+   output.val_mat3x2 = mat3x2h(mat3x2f(input_uniform.val_mat3x2));
+   output.val_mat3x3 = mat3x3h(mat3x3f(input_uniform.val_mat3x3));
+   output.val_mat3x4 = mat3x4h(mat3x4f(input_uniform.val_mat3x4));
+   output.val_mat4x2 = mat4x2h(mat4x2f(input_uniform.val_mat4x2));
+   output.val_mat4x3 = mat4x3h(mat4x3f(input_uniform.val_mat4x3));
+   output.val_mat4x4 = mat4x4h(mat4x4f(input_uniform.val_mat4x4));
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+   output.final_value = f16_function(2h);
+}
+
diff --git a/naga/tests/out/hlsl/f16.hlsl b/naga/tests/out/hlsl/f16.hlsl
new file mode 100644
index 0000000000..05e2387212
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.hlsl
@@ -0,0 +1,351 @@
+struct NagaConstants {
+    int first_vertex;
+    int first_instance;
+    uint other;
+};
+ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    half2 val_f16_2_;
+    int _pad5_0;
+    half3 val_f16_3_;
+    half4 val_f16_4_;
+    half final_value;
+    half2 val_mat2x2__0; half2 val_mat2x2__1;
+    int _pad9_0;
+    row_major half2x3 val_mat2x3_;
+    row_major half2x4 val_mat2x4_;
+    half2 val_mat3x2__0; half2 val_mat3x2__1; half2 val_mat3x2__2;
+    int _pad12_0;
+    row_major half3x3 val_mat3x3_;
+    row_major half3x4 val_mat3x4_;
+    half2 val_mat4x2__0; half2 val_mat4x2__1; half2 val_mat4x2__2; half2 val_mat4x2__3;
+    row_major half4x3 val_mat4x3_;
+    row_major half4x4 val_mat4x4_;
+};
+
+struct StorageCompatible {
+    half val_f16_array_2_[2];
+};
+
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    int _pad2_0;
+    half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    uint larger;
+};
+
+static const half constant_variable = 15.203125h;
+
+static half private_variable = 1.0h;
+cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
+ByteAddressBuffer input_storage : register(t1);
+ByteAddressBuffer input_arrays : register(t2);
+RWByteAddressBuffer output : register(u3);
+RWByteAddressBuffer output_arrays : register(u4);
+
+half2x2 GetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj) {
+    return half2x2(obj.val_mat2x2__0, obj.val_mat2x2__1);
+}
+
+void SetMatval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2x2 mat) {
+    obj.val_mat2x2__0 = mat[0];
+    obj.val_mat2x2__1 = mat[1];
+}
+
+void SetMatVecval_mat2x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0 = vec; break; }
+    case 1: { obj.val_mat2x2__1 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat2x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat2x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat2x2__1[vec_idx] = scalar; break; }
+    }
+}
+
+half3x2 GetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj) {
+    return half3x2(obj.val_mat3x2__0, obj.val_mat3x2__1, obj.val_mat3x2__2);
+}
+
+void SetMatval_mat3x2_OnUniformCompatible(UniformCompatible obj, half3x2 mat) {
+    obj.val_mat3x2__0 = mat[0];
+    obj.val_mat3x2__1 = mat[1];
+    obj.val_mat3x2__2 = mat[2];
+}
+
+void SetMatVecval_mat3x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0 = vec; break; }
+    case 1: { obj.val_mat3x2__1 = vec; break; }
+    case 2: { obj.val_mat3x2__2 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat3x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat3x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat3x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat3x2__2[vec_idx] = scalar; break; }
+    }
+}
+
+half4x2 GetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj) {
+    return half4x2(obj.val_mat4x2__0, obj.val_mat4x2__1, obj.val_mat4x2__2, obj.val_mat4x2__3);
+}
+
+void SetMatval_mat4x2_OnUniformCompatible(UniformCompatible obj, half4x2 mat) {
+    obj.val_mat4x2__0 = mat[0];
+    obj.val_mat4x2__1 = mat[1];
+    obj.val_mat4x2__2 = mat[2];
+    obj.val_mat4x2__3 = mat[3];
+}
+
+void SetMatVecval_mat4x2_OnUniformCompatible(UniformCompatible obj, half2 vec, uint mat_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0 = vec; break; }
+    case 1: { obj.val_mat4x2__1 = vec; break; }
+    case 2: { obj.val_mat4x2__2 = vec; break; }
+    case 3: { obj.val_mat4x2__3 = vec; break; }
+    }
+}
+
+void SetMatScalarval_mat4x2_OnUniformCompatible(UniformCompatible obj, half scalar, uint mat_idx, uint vec_idx) {
+    switch(mat_idx) {
+    case 0: { obj.val_mat4x2__0[vec_idx] = scalar; break; }
+    case 1: { obj.val_mat4x2__1[vec_idx] = scalar; break; }
+    case 2: { obj.val_mat4x2__2[vec_idx] = scalar; break; }
+    case 3: { obj.val_mat4x2__3[vec_idx] = scalar; break; }
+    }
+}
+
+typedef half ret_Constructarray2_half_[2];
+ret_Constructarray2_half_ Constructarray2_half_(half arg0, half arg1) {
+    half ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+half f16_function(half x)
+{
+    half val = 15.203125h;
+
+    half _e4 = val;
+    val = (_e4 + -33344.0h);
+    half _e6 = val;
+    half _e9 = val;
+    val = (_e9 + (_e6 + 5.0h));
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = (_e18 + half((_e13 + float(_e14))));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = (_e25 + (_e22).xxx.z);
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.Load<half>(12);
+    output.Store(12, (_e31 + _e34));
+    half2 _e40 = input_uniform.val_f16_2_;
+    half2 _e43 = input_storage.Load<half2>(16);
+    output.Store(16, (_e40 + _e43));
+    half3 _e49 = input_uniform.val_f16_3_;
+    half3 _e52 = input_storage.Load<half3>(24);
+    output.Store(24, (_e49 + _e52));
+    half4 _e58 = input_uniform.val_f16_4_;
+    half4 _e61 = input_storage.Load<half4>(32);
+    output.Store(32, (_e58 + _e61));
+    half2x2 _e67 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    half2x2 _e70 = half2x2(input_storage.Load<half2>(44+0), input_storage.Load<half2>(44+4));
+    {
+        half2x2 _value2 = (_e67 + _e70);
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e76 = input_uniform.val_mat2x3_;
+    half2x3 _e79 = half2x3(input_storage.Load<half3>(56+0), input_storage.Load<half3>(56+8));
+    {
+        half2x3 _value2 = (_e76 + _e79);
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e85 = input_uniform.val_mat2x4_;
+    half2x4 _e88 = half2x4(input_storage.Load<half4>(72+0), input_storage.Load<half4>(72+8));
+    {
+        half2x4 _value2 = (_e85 + _e88);
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e94 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    half3x2 _e97 = half3x2(input_storage.Load<half2>(88+0), input_storage.Load<half2>(88+4), input_storage.Load<half2>(88+8));
+    {
+        half3x2 _value2 = (_e94 + _e97);
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e103 = input_uniform.val_mat3x3_;
+    half3x3 _e106 = half3x3(input_storage.Load<half3>(104+0), input_storage.Load<half3>(104+8), input_storage.Load<half3>(104+16));
+    {
+        half3x3 _value2 = (_e103 + _e106);
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e112 = input_uniform.val_mat3x4_;
+    half3x4 _e115 = half3x4(input_storage.Load<half4>(128+0), input_storage.Load<half4>(128+8), input_storage.Load<half4>(128+16));
+    {
+        half3x4 _value2 = (_e112 + _e115);
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e121 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    half4x2 _e124 = half4x2(input_storage.Load<half2>(152+0), input_storage.Load<half2>(152+4), input_storage.Load<half2>(152+8), input_storage.Load<half2>(152+12));
+    {
+        half4x2 _value2 = (_e121 + _e124);
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e130 = input_uniform.val_mat4x3_;
+    half4x3 _e133 = half4x3(input_storage.Load<half3>(168+0), input_storage.Load<half3>(168+8), input_storage.Load<half3>(168+16), input_storage.Load<half3>(168+24));
+    {
+        half4x3 _value2 = (_e130 + _e133);
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e139 = input_uniform.val_mat4x4_;
+    half4x4 _e142 = half4x4(input_storage.Load<half4>(200+0), input_storage.Load<half4>(200+8), input_storage.Load<half4>(200+16), input_storage.Load<half4>(200+24));
+    {
+        half4x4 _value2 = (_e139 + _e142);
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e148[2] = Constructarray2_half_(input_arrays.Load<half>(0+0), input_arrays.Load<half>(0+2));
+    {
+        half _value2[2] = _e148;
+        output_arrays.Store(0+0, _value2[0]);
+        output_arrays.Store(0+2, _value2[1]);
+    }
+    half _e149 = val;
+    half _e151 = val;
+    val = (_e151 + abs(_e149));
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = (_e164 + dot((_e159).xx, (_e161).xx));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    half _e176 = val;
+    half _e178 = val;
+    val = (_e178 + sign(_e176));
+    half _e181 = val;
+    val = (_e181 + 1.0h);
+    half2 _e185 = input_uniform.val_f16_2_;
+    float2 float_vec2_ = float2(_e185);
+    output.Store(16, half2(float_vec2_));
+    half3 _e192 = input_uniform.val_f16_3_;
+    float3 float_vec3_ = float3(_e192);
+    output.Store(24, half3(float_vec3_));
+    half4 _e199 = input_uniform.val_f16_4_;
+    float4 float_vec4_ = float4(_e199);
+    output.Store(32, half4(float_vec4_));
+    half2x2 _e208 = GetMatval_mat2x2_OnUniformCompatible(input_uniform);
+    {
+        half2x2 _value2 = half2x2(float2x2(_e208));
+        output.Store(44+0, _value2[0]);
+        output.Store(44+4, _value2[1]);
+    }
+    half2x3 _e215 = input_uniform.val_mat2x3_;
+    {
+        half2x3 _value2 = half2x3(float2x3(_e215));
+        output.Store(56+0, _value2[0]);
+        output.Store(56+8, _value2[1]);
+    }
+    half2x4 _e222 = input_uniform.val_mat2x4_;
+    {
+        half2x4 _value2 = half2x4(float2x4(_e222));
+        output.Store(72+0, _value2[0]);
+        output.Store(72+8, _value2[1]);
+    }
+    half3x2 _e229 = GetMatval_mat3x2_OnUniformCompatible(input_uniform);
+    {
+        half3x2 _value2 = half3x2(float3x2(_e229));
+        output.Store(88+0, _value2[0]);
+        output.Store(88+4, _value2[1]);
+        output.Store(88+8, _value2[2]);
+    }
+    half3x3 _e236 = input_uniform.val_mat3x3_;
+    {
+        half3x3 _value2 = half3x3(float3x3(_e236));
+        output.Store(104+0, _value2[0]);
+        output.Store(104+8, _value2[1]);
+        output.Store(104+16, _value2[2]);
+    }
+    half3x4 _e243 = input_uniform.val_mat3x4_;
+    {
+        half3x4 _value2 = half3x4(float3x4(_e243));
+        output.Store(128+0, _value2[0]);
+        output.Store(128+8, _value2[1]);
+        output.Store(128+16, _value2[2]);
+    }
+    half4x2 _e250 = GetMatval_mat4x2_OnUniformCompatible(input_uniform);
+    {
+        half4x2 _value2 = half4x2(float4x2(_e250));
+        output.Store(152+0, _value2[0]);
+        output.Store(152+4, _value2[1]);
+        output.Store(152+8, _value2[2]);
+        output.Store(152+12, _value2[3]);
+    }
+    half4x3 _e257 = input_uniform.val_mat4x3_;
+    {
+        half4x3 _value2 = half4x3(float4x3(_e257));
+        output.Store(168+0, _value2[0]);
+        output.Store(168+8, _value2[1]);
+        output.Store(168+16, _value2[2]);
+        output.Store(168+24, _value2[3]);
+    }
+    half4x4 _e264 = input_uniform.val_mat4x4_;
+    {
+        half4x4 _value2 = half4x4(float4x4(_e264));
+        output.Store(200+0, _value2[0]);
+        output.Store(200+8, _value2[1]);
+        output.Store(200+16, _value2[2]);
+        output.Store(200+24, _value2[3]);
+    }
+    half _e267 = val;
+    return _e267;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const half _e3 = f16_function(2.0h);
+    output.Store(40, _e3);
+    return;
+}
diff --git a/naga/tests/out/hlsl/f16.ron b/naga/tests/out/hlsl/f16.ron
new file mode 100644
index 0000000000..b396a4626e
--- /dev/null
+++ b/naga/tests/out/hlsl/f16.ron
@@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_2",
+        ),
+    ],
+)
diff --git a/naga/tests/out/msl/extra.msl b/naga/tests/out/msl/extra.msl
index 8288dfad92..4d6bb568f3 100644
--- a/naga/tests/out/msl/extra.msl
+++ b/naga/tests/out/msl/extra.msl
@@ -6,7 +6,7 @@ using metal::uint;
 
 struct PushConstants {
     uint index;
-    char _pad1[12];
+    char _pad1[4];
     metal::float2 double_;
 };
 struct FragmentIn {
diff --git a/naga/tests/out/msl/f16.msl b/naga/tests/out/msl/f16.msl
new file mode 100644
index 0000000000..40be58e901
--- /dev/null
+++ b/naga/tests/out/msl/f16.msl
@@ -0,0 +1,177 @@
+// language: metal1.0
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    half val_f16_;
+    char _pad4[2];
+    metal::half2 val_f16_2_;
+    char _pad5[4];
+    metal::half3 val_f16_3_;
+    metal::half4 val_f16_4_;
+    half final_value;
+    char _pad8[2];
+    metal::half2x2 val_mat2x2_;
+    char _pad9[4];
+    metal::half2x3 val_mat2x3_;
+    metal::half2x4 val_mat2x4_;
+    metal::half3x2 val_mat3x2_;
+    char _pad12[4];
+    metal::half3x3 val_mat3x3_;
+    metal::half3x4 val_mat3x4_;
+    metal::half4x2 val_mat4x2_;
+    metal::half4x3 val_mat4x3_;
+    metal::half4x4 val_mat4x4_;
+};
+struct type_16 {
+    half inner[2];
+};
+struct StorageCompatible {
+    type_16 val_f16_array_2_;
+};
+struct LayoutTest {
+    half scalar1_;
+    half scalar2_;
+    char _pad2[4];
+    metal::packed_half3 v3_;
+    half tuck_in;
+    half scalar4_;
+    char _pad5[2];
+    uint larger;
+};
+constant half constant_variable = 15.203125h;
+
+half f16_function(
+    half x,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    half val = 15.203125h;
+    half _e4 = val;
+    val = _e4 + -33344.0h;
+    half _e6 = val;
+    half _e9 = val;
+    val = _e9 + (_e6 + 5.0h);
+    float _e13 = input_uniform.val_f32_;
+    half _e14 = val;
+    half _e18 = val;
+    val = _e18 + static_cast<half>(_e13 + static_cast<float>(_e14));
+    half _e22 = input_uniform.val_f16_;
+    half _e25 = val;
+    val = _e25 + metal::half3(_e22).z;
+    half _e31 = input_uniform.val_f16_;
+    half _e34 = input_storage.val_f16_;
+    output.val_f16_ = _e31 + _e34;
+    metal::half2 _e40 = input_uniform.val_f16_2_;
+    metal::half2 _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = _e40 + _e43;
+    metal::half3 _e49 = input_uniform.val_f16_3_;
+    metal::half3 _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = _e49 + _e52;
+    metal::half4 _e58 = input_uniform.val_f16_4_;
+    metal::half4 _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = _e58 + _e61;
+    metal::half2x2 _e67 = input_uniform.val_mat2x2_;
+    metal::half2x2 _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = _e67 + _e70;
+    metal::half2x3 _e76 = input_uniform.val_mat2x3_;
+    metal::half2x3 _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = _e76 + _e79;
+    metal::half2x4 _e85 = input_uniform.val_mat2x4_;
+    metal::half2x4 _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = _e85 + _e88;
+    metal::half3x2 _e94 = input_uniform.val_mat3x2_;
+    metal::half3x2 _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = _e94 + _e97;
+    metal::half3x3 _e103 = input_uniform.val_mat3x3_;
+    metal::half3x3 _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = _e103 + _e106;
+    metal::half3x4 _e112 = input_uniform.val_mat3x4_;
+    metal::half3x4 _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = _e112 + _e115;
+    metal::half4x2 _e121 = input_uniform.val_mat4x2_;
+    metal::half4x2 _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = _e121 + _e124;
+    metal::half4x3 _e130 = input_uniform.val_mat4x3_;
+    metal::half4x3 _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = _e130 + _e133;
+    metal::half4x4 _e139 = input_uniform.val_mat4x4_;
+    metal::half4x4 _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = _e139 + _e142;
+    type_16 _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    half _e149 = val;
+    half _e151 = val;
+    val = _e151 + metal::abs(_e149);
+    half _e153 = val;
+    half _e154 = val;
+    half _e155 = val;
+    half _e157 = val;
+    val = _e157 + metal::clamp(_e153, _e154, _e155);
+    half _e159 = val;
+    half _e161 = val;
+    half _e164 = val;
+    val = _e164 + metal::dot(metal::half2(_e159), metal::half2(_e161));
+    half _e166 = val;
+    half _e167 = val;
+    half _e169 = val;
+    val = _e169 + metal::max(_e166, _e167);
+    half _e171 = val;
+    half _e172 = val;
+    half _e174 = val;
+    val = _e174 + metal::min(_e171, _e172);
+    half _e176 = val;
+    half _e178 = val;
+    val = _e178 + metal::sign(_e176);
+    half _e181 = val;
+    val = _e181 + 1.0h;
+    metal::half2 _e185 = input_uniform.val_f16_2_;
+    metal::float2 float_vec2_ = static_cast<metal::float2>(_e185);
+    output.val_f16_2_ = static_cast<metal::half2>(float_vec2_);
+    metal::half3 _e192 = input_uniform.val_f16_3_;
+    metal::float3 float_vec3_ = static_cast<metal::float3>(_e192);
+    output.val_f16_3_ = static_cast<metal::half3>(float_vec3_);
+    metal::half4 _e199 = input_uniform.val_f16_4_;
+    metal::float4 float_vec4_ = static_cast<metal::float4>(_e199);
+    output.val_f16_4_ = static_cast<metal::half4>(float_vec4_);
+    metal::half2x2 _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = metal::half2x2(metal::float2x2(_e208));
+    metal::half2x3 _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = metal::half2x3(metal::float2x3(_e215));
+    metal::half2x4 _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = metal::half2x4(metal::float2x4(_e222));
+    metal::half3x2 _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = metal::half3x2(metal::float3x2(_e229));
+    metal::half3x3 _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = metal::half3x3(metal::float3x3(_e236));
+    metal::half3x4 _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = metal::half3x4(metal::float3x4(_e243));
+    metal::half4x2 _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = metal::half4x2(metal::float4x2(_e250));
+    metal::half4x3 _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = metal::half4x3(metal::float4x3(_e257));
+    metal::half4x4 _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = metal::half4x4(metal::float4x4(_e264));
+    half _e267 = val;
+    return _e267;
+}
+
+kernel void main_(
+  constant UniformCompatible& input_uniform [[user(fake0)]]
+, device UniformCompatible const& input_storage [[user(fake0)]]
+, device StorageCompatible const& input_arrays [[user(fake0)]]
+, device UniformCompatible& output [[user(fake0)]]
+, device StorageCompatible& output_arrays [[user(fake0)]]
+) {
+    half _e3 = f16_function(2.0h, input_uniform, input_storage, input_arrays, output, output_arrays);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
index 2697eee855..b7dbc9e0fc 100644
--- a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
+++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.0
 ; Generator: rspirv
-; Bound: 175
+; Bound: 173
 OpCapability Shader
 OpCapability Int64Atomics
 OpCapability Int64
@@ -9,9 +9,9 @@ OpExtension "SPV_KHR_storage_buffer_storage_class"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64"
-OpEntryPoint GLCompute %105 "test_atomic_compare_exchange_u64"
+OpEntryPoint GLCompute %104 "test_atomic_compare_exchange_u64"
 OpExecutionMode %19 LocalSize 1 1 1
-OpExecutionMode %105 LocalSize 1 1 1
+OpExecutionMode %104 LocalSize 1 1 1
 OpDecorate %5 ArrayStride 8
 OpDecorate %8 ArrayStride 8
 OpMemberDecorate %10 0 Offset 0
@@ -63,12 +63,12 @@ OpMemberDecorate %16 0 Offset 0
 %68 = OpTypeInt 32 1
 %67 = OpConstant  %68  1
 %69 = OpConstant  %3  64
-%106 = OpTypePointer StorageBuffer %8
-%108 = OpConstant  %7  10
-%111 = OpTypePointer Function %7
-%112 = OpConstantNull  %7
-%114 = OpConstantNull  %9
-%138 = OpTypePointer StorageBuffer %7
+%105 = OpTypePointer StorageBuffer %8
+%107 = OpConstant  %7  10
+%110 = OpTypePointer Function %7
+%111 = OpConstantNull  %7
+%113 = OpConstantNull  %9
+%137 = OpTypePointer StorageBuffer %7
 %19 = OpFunction  %2  None %20
 %18 = OpLabel
 %27 = OpVariable  %28  Function %22
@@ -142,16 +142,16 @@ OpBranch %89
 %89 = OpLabel
 %91 = OpLoad  %4  %29
 %92 = OpIAdd  %4  %91 %25
-%94 = OpLoad  %3  %27
-%95 = OpLoad  %4  %29
-%97 = OpAccessChain  %64  %23 %94
-%98 = OpAtomicCompareExchange  %4  %97 %67 %69 %69 %92 %95
-%99 = OpIEqual  %9  %98 %95
-%96 = OpCompositeConstruct  %10  %98 %99
-%100 = OpCompositeExtract  %4  %96 0
-OpStore %29 %100
-%101 = OpCompositeExtract  %9  %96 1
-OpStore %32 %101
+%93 = OpLoad  %3  %27
+%94 = OpLoad  %4  %29
+%96 = OpAccessChain  %64  %23 %93
+%97 = OpAtomicCompareExchange  %4  %96 %67 %69 %69 %92 %94
+%98 = OpIEqual  %9  %97 %94
+%95 = OpCompositeConstruct  %10  %97 %98
+%99 = OpCompositeExtract  %4  %95 0
+OpStore %29 %99
+%100 = OpCompositeExtract  %9  %95 1
+OpStore %32 %100
 OpBranch %90
 %90 = OpLabel
 OpBranch %73
@@ -162,110 +162,110 @@ OpBranch %62
 %62 = OpLabel
 OpBranch %39
 %39 = OpLabel
-%102 = OpLoad  %3  %27
-%103 = OpIAdd  %3  %102 %26
-OpStore %27 %103
+%101 = OpLoad  %3  %27
+%102 = OpIAdd  %3  %101 %26
+OpStore %27 %102
 OpBranch %36
 %37 = OpLabel
 OpReturn
 OpFunctionEnd
-%105 = OpFunction  %2  None %20
-%104 = OpLabel
-%109 = OpVariable  %28  Function %22
-%110 = OpVariable  %111  Function %112
-%113 = OpVariable  %33  Function %114
-%120 = OpVariable  %41  Function %43
-%145 = OpVariable  %41  Function %43
-%107 = OpAccessChain  %106  %15 %22
+%104 = OpFunction  %2  None %20
+%103 = OpLabel
+%108 = OpVariable  %28  Function %22
+%109 = OpVariable  %110  Function %111
+%112 = OpVariable  %33  Function %113
+%119 = OpVariable  %41  Function %43
+%144 = OpVariable  %41  Function %43
+%106 = OpAccessChain  %105  %15 %22
+OpBranch %114
+%114 = OpLabel
 OpBranch %115
 %115 = OpLabel
-OpBranch %116
-%116 = OpLabel
-OpLoopMerge %117 %119 None
-OpBranch %121
-%121 = OpLabel
-%122 = OpLoad  %40  %120
-%123 = OpIEqual  %42  %45 %122
-%124 = OpAll  %9  %123
-OpSelectionMerge %125 None
-OpBranchConditional %124 %117 %125
-%125 = OpLabel
-%126 = OpCompositeExtract  %3  %122 1
-%127 = OpIEqual  %9  %126 %44
-%128 = OpSelect  %3  %127 %26 %22
-%129 = OpCompositeConstruct  %40  %128 %26
-%130 = OpIAdd  %40  %122 %129
-OpStore %120 %130
-OpBranch %118
-%118 = OpLabel
-%131 = OpLoad  %3  %109
-%132 = OpULessThan  %9  %131 %6
-OpSelectionMerge %133 None
-OpBranchConditional %132 %133 %134
-%134 = OpLabel
+OpLoopMerge %116 %118 None
+OpBranch %120
+%120 = OpLabel
+%121 = OpLoad  %40  %119
+%122 = OpIEqual  %42  %45 %121
+%123 = OpAll  %9  %122
+OpSelectionMerge %124 None
+OpBranchConditional %123 %116 %124
+%124 = OpLabel
+%125 = OpCompositeExtract  %3  %121 1
+%126 = OpIEqual  %9  %125 %44
+%127 = OpSelect  %3  %126 %26 %22
+%128 = OpCompositeConstruct  %40  %127 %26
+%129 = OpIAdd  %40  %121 %128
+OpStore %119 %129
 OpBranch %117
+%117 = OpLabel
+%130 = OpLoad  %3  %108
+%131 = OpULessThan  %9  %130 %6
+OpSelectionMerge %132 None
+OpBranchConditional %131 %132 %133
 %133 = OpLabel
-OpBranch %135
-%135 = OpLabel
-%137 = OpLoad  %3  %109
-%139 = OpAccessChain  %138  %107 %137
-%140 = OpAtomicLoad  %7  %139 %67 %69
-OpStore %110 %140
-OpStore %113 %24
-OpBranch %141
-%141 = OpLabel
-OpLoopMerge %142 %144 None
-OpBranch %146
-%146 = OpLabel
-%147 = OpLoad  %40  %145
-%148 = OpIEqual  %42  %45 %147
-%149 = OpAll  %9  %148
-OpSelectionMerge %150 None
-OpBranchConditional %149 %142 %150
-%150 = OpLabel
-%151 = OpCompositeExtract  %3  %147 1
-%152 = OpIEqual  %9  %151 %44
-%153 = OpSelect  %3  %152 %26 %22
-%154 = OpCompositeConstruct  %40  %153 %26
-%155 = OpIAdd  %40  %147 %154
-OpStore %145 %155
-OpBranch %143
-%143 = OpLabel
-%156 = OpLoad  %9  %113
-%157 = OpLogicalNot  %9  %156
-OpSelectionMerge %158 None
-OpBranchConditional %157 %158 %159
-%159 = OpLabel
+OpBranch %116
+%132 = OpLabel
+OpBranch %134
+%134 = OpLabel
+%136 = OpLoad  %3  %108
+%138 = OpAccessChain  %137  %106 %136
+%139 = OpAtomicLoad  %7  %138 %67 %69
+OpStore %109 %139
+OpStore %112 %24
+OpBranch %140
+%140 = OpLabel
+OpLoopMerge %141 %143 None
+OpBranch %145
+%145 = OpLabel
+%146 = OpLoad  %40  %144
+%147 = OpIEqual  %42  %45 %146
+%148 = OpAll  %9  %147
+OpSelectionMerge %149 None
+OpBranchConditional %148 %141 %149
+%149 = OpLabel
+%150 = OpCompositeExtract  %3  %146 1
+%151 = OpIEqual  %9  %150 %44
+%152 = OpSelect  %3  %151 %26 %22
+%153 = OpCompositeConstruct  %40  %152 %26
+%154 = OpIAdd  %40  %146 %153
+OpStore %144 %154
 OpBranch %142
+%142 = OpLabel
+%155 = OpLoad  %9  %112
+%156 = OpLogicalNot  %9  %155
+OpSelectionMerge %157 None
+OpBranchConditional %156 %157 %158
 %158 = OpLabel
+OpBranch %141
+%157 = OpLabel
+OpBranch %159
+%159 = OpLabel
+%161 = OpLoad  %7  %109
+%162 = OpIAdd  %7  %161 %107
+%163 = OpLoad  %3  %108
+%164 = OpLoad  %7  %109
+%166 = OpAccessChain  %137  %106 %163
+%167 = OpAtomicCompareExchange  %7  %166 %67 %69 %69 %162 %164
+%168 = OpIEqual  %9  %167 %164
+%165 = OpCompositeConstruct  %11  %167 %168
+%169 = OpCompositeExtract  %7  %165 0
+OpStore %109 %169
+%170 = OpCompositeExtract  %9  %165 1
+OpStore %112 %170
 OpBranch %160
 %160 = OpLabel
-%162 = OpLoad  %7  %110
-%163 = OpIAdd  %7  %162 %108
-%165 = OpLoad  %3  %109
-%166 = OpLoad  %7  %110
-%168 = OpAccessChain  %138  %107 %165
-%169 = OpAtomicCompareExchange  %7  %168 %67 %69 %69 %163 %166
-%170 = OpIEqual  %9  %169 %166
-%167 = OpCompositeConstruct  %11  %169 %170
-%171 = OpCompositeExtract  %7  %167 0
-OpStore %110 %171
-%172 = OpCompositeExtract  %9  %167 1
-OpStore %113 %172
-OpBranch %161
-%161 = OpLabel
-OpBranch %144
-%144 = OpLabel
-OpBranch %141
-%142 = OpLabel
-OpBranch %136
-%136 = OpLabel
-OpBranch %119
-%119 = OpLabel
-%173 = OpLoad  %3  %109
-%174 = OpIAdd  %3  %173 %26
-OpStore %109 %174
-OpBranch %116
-%117 = OpLabel
+OpBranch %143
+%143 = OpLabel
+OpBranch %140
+%141 = OpLabel
+OpBranch %135
+%135 = OpLabel
+OpBranch %118
+%118 = OpLabel
+%171 = OpLoad  %3  %108
+%172 = OpIAdd  %3  %171 %26
+OpStore %108 %172
+OpBranch %115
+%116 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/extra.spvasm b/naga/tests/out/spv/extra.spvasm
index 9c434a8ce2..0e84427bad 100644
--- a/naga/tests/out/spv/extra.spvasm
+++ b/naga/tests/out/spv/extra.spvasm
@@ -1,76 +1,74 @@
 ; SPIR-V
 ; Version: 1.2
 ; Generator: rspirv
-; Bound: 48
+; Bound: 47
 OpCapability Shader
-OpCapability Float64
 OpCapability Geometry
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %23 "main" %15 %18 %21
-OpExecutionMode %23 OriginUpperLeft
+OpEntryPoint Fragment %22 "main" %14 %17 %20
+OpExecutionMode %22 OriginUpperLeft
 OpMemberDecorate %6 0 Offset 0
-OpMemberDecorate %6 1 Offset 16
-OpMemberDecorate %9 0 Offset 0
-OpMemberDecorate %9 1 Offset 16
-OpDecorate %11 Block
-OpMemberDecorate %11 0 Offset 0
-OpDecorate %15 Location 0
-OpDecorate %18 BuiltIn PrimitiveId
-OpDecorate %18 Flat
-OpDecorate %21 Location 0
+OpMemberDecorate %6 1 Offset 8
+OpMemberDecorate %8 0 Offset 0
+OpMemberDecorate %8 1 Offset 16
+OpDecorate %10 Block
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %14 Location 0
+OpDecorate %17 BuiltIn PrimitiveId
+OpDecorate %17 Flat
+OpDecorate %20 Location 0
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
-%5 = OpTypeFloat 64
+%5 = OpTypeFloat 32
 %4 = OpTypeVector %5 2
 %6 = OpTypeStruct %3 %4
-%8 = OpTypeFloat 32
-%7 = OpTypeVector %8 4
-%9 = OpTypeStruct %7 %3
-%11 = OpTypeStruct %6
-%12 = OpTypePointer PushConstant %11
-%10 = OpVariable  %12  PushConstant
-%16 = OpTypePointer Input %7
-%15 = OpVariable  %16  Input
-%19 = OpTypePointer Input %3
-%18 = OpVariable  %19  Input
-%22 = OpTypePointer Output %7
-%21 = OpVariable  %22  Output
-%24 = OpTypeFunction %2
-%25 = OpTypePointer PushConstant %6
-%26 = OpConstant  %3  0
-%28 = OpConstant  %8  1.0
-%29 = OpTypeVector %8 3
-%30 = OpConstantComposite  %29  %28 %28 %28
-%33 = OpTypePointer PushConstant %3
-%36 = OpTypeBool
-%23 = OpFunction  %2  None %24
-%13 = OpLabel
-%17 = OpLoad  %7  %15
-%20 = OpLoad  %3  %18
-%14 = OpCompositeConstruct  %9  %17 %20
-%27 = OpAccessChain  %25  %10 %26
-OpBranch %31
-%31 = OpLabel
-%32 = OpCompositeExtract  %3  %14 1
-%34 = OpAccessChain  %33  %27 %26
-%35 = OpLoad  %3  %34
-%37 = OpIEqual  %36  %32 %35
-OpSelectionMerge %38 None
-OpBranchConditional %37 %39 %40
-%39 = OpLabel
-%41 = OpCompositeExtract  %7  %14 0
-OpStore %21 %41
+%7 = OpTypeVector %5 4
+%8 = OpTypeStruct %7 %3
+%10 = OpTypeStruct %6
+%11 = OpTypePointer PushConstant %10
+%9 = OpVariable  %11  PushConstant
+%15 = OpTypePointer Input %7
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %3
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Output %7
+%20 = OpVariable  %21  Output
+%23 = OpTypeFunction %2
+%24 = OpTypePointer PushConstant %6
+%25 = OpConstant  %3  0
+%27 = OpConstant  %5  1.0
+%28 = OpTypeVector %5 3
+%29 = OpConstantComposite  %28  %27 %27 %27
+%32 = OpTypePointer PushConstant %3
+%35 = OpTypeBool
+%22 = OpFunction  %2  None %23
+%12 = OpLabel
+%16 = OpLoad  %7  %14
+%19 = OpLoad  %3  %17
+%13 = OpCompositeConstruct  %8  %16 %19
+%26 = OpAccessChain  %24  %9 %25
+OpBranch %30
+%30 = OpLabel
+%31 = OpCompositeExtract  %3  %13 1
+%33 = OpAccessChain  %32  %26 %25
+%34 = OpLoad  %3  %33
+%36 = OpIEqual  %35  %31 %34
+OpSelectionMerge %37 None
+OpBranchConditional %36 %38 %39
+%38 = OpLabel
+%40 = OpCompositeExtract  %7  %13 0
+OpStore %20 %40
 OpReturn
-%40 = OpLabel
-%42 = OpCompositeExtract  %7  %14 0
-%43 = OpVectorShuffle  %29  %42 %42 0 1 2
-%44 = OpFSub  %29  %30 %43
-%45 = OpCompositeExtract  %7  %14 0
-%46 = OpCompositeExtract  %8  %45 3
-%47 = OpCompositeConstruct  %7  %44 %46
-OpStore %21 %47
+%39 = OpLabel
+%41 = OpCompositeExtract  %7  %13 0
+%42 = OpVectorShuffle  %28  %41 %41 0 1 2
+%43 = OpFSub  %28  %29 %42
+%44 = OpCompositeExtract  %7  %13 0
+%45 = OpCompositeExtract  %5  %44 3
+%46 = OpCompositeConstruct  %7  %43 %45
+OpStore %20 %46
 OpReturn
-%38 = OpLabel
+%37 = OpLabel
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/f16.spvasm b/naga/tests/out/spv/f16.spvasm
new file mode 100644
index 0000000000..3d8fcea7a7
--- /dev/null
+++ b/naga/tests/out/spv/f16.spvasm
@@ -0,0 +1,633 @@
+; SPIR-V
+; Version: 1.0
+; Generator: rspirv
+; Bound: 511
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %499 "main"
+OpExecutionMode %499 LocalSize 1 1 1
+OpMemberDecorate %19 0 Offset 0
+OpMemberDecorate %19 1 Offset 4
+OpMemberDecorate %19 2 Offset 8
+OpMemberDecorate %19 3 Offset 12
+OpMemberDecorate %19 4 Offset 16
+OpMemberDecorate %19 5 Offset 24
+OpMemberDecorate %19 6 Offset 32
+OpMemberDecorate %19 7 Offset 40
+OpMemberDecorate %19 8 Offset 44
+OpMemberDecorate %19 8 ColMajor
+OpMemberDecorate %19 8 MatrixStride 4
+OpMemberDecorate %19 9 Offset 56
+OpMemberDecorate %19 9 ColMajor
+OpMemberDecorate %19 9 MatrixStride 8
+OpMemberDecorate %19 10 Offset 72
+OpMemberDecorate %19 10 ColMajor
+OpMemberDecorate %19 10 MatrixStride 8
+OpMemberDecorate %19 11 Offset 88
+OpMemberDecorate %19 11 ColMajor
+OpMemberDecorate %19 11 MatrixStride 4
+OpMemberDecorate %19 12 Offset 104
+OpMemberDecorate %19 12 ColMajor
+OpMemberDecorate %19 12 MatrixStride 8
+OpMemberDecorate %19 13 Offset 128
+OpMemberDecorate %19 13 ColMajor
+OpMemberDecorate %19 13 MatrixStride 8
+OpMemberDecorate %19 14 Offset 152
+OpMemberDecorate %19 14 ColMajor
+OpMemberDecorate %19 14 MatrixStride 4
+OpMemberDecorate %19 15 Offset 168
+OpMemberDecorate %19 15 ColMajor
+OpMemberDecorate %19 15 MatrixStride 8
+OpMemberDecorate %19 16 Offset 200
+OpMemberDecorate %19 16 ColMajor
+OpMemberDecorate %19 16 MatrixStride 8
+OpDecorate %20 ArrayStride 2
+OpMemberDecorate %22 0 Offset 0
+OpMemberDecorate %23 0 Offset 0
+OpMemberDecorate %23 1 Offset 2
+OpMemberDecorate %23 2 Offset 8
+OpMemberDecorate %23 3 Offset 14
+OpMemberDecorate %23 4 Offset 16
+OpMemberDecorate %23 5 Offset 20
+OpDecorate %28 DescriptorSet 0
+OpDecorate %28 Binding 0
+OpDecorate %29 Block
+OpMemberDecorate %29 0 Offset 0
+OpDecorate %31 NonWritable
+OpDecorate %31 DescriptorSet 0
+OpDecorate %31 Binding 1
+OpDecorate %32 Block
+OpMemberDecorate %32 0 Offset 0
+OpDecorate %34 NonWritable
+OpDecorate %34 DescriptorSet 0
+OpDecorate %34 Binding 2
+OpDecorate %35 Block
+OpMemberDecorate %35 0 Offset 0
+OpDecorate %37 DescriptorSet 0
+OpDecorate %37 Binding 3
+OpDecorate %38 Block
+OpMemberDecorate %38 0 Offset 0
+OpDecorate %40 DescriptorSet 0
+OpDecorate %40 Binding 4
+OpDecorate %41 Block
+OpMemberDecorate %41 0 Offset 0
+%2 = OpTypeVoid
+%3 = OpTypeFloat 16
+%4 = OpTypeInt 32 0
+%5 = OpTypeInt 32 1
+%6 = OpTypeFloat 32
+%7 = OpTypeVector %3 2
+%8 = OpTypeVector %3 3
+%9 = OpTypeVector %3 4
+%10 = OpTypeMatrix %7 2
+%11 = OpTypeMatrix %8 2
+%12 = OpTypeMatrix %9 2
+%13 = OpTypeMatrix %7 3
+%14 = OpTypeMatrix %8 3
+%15 = OpTypeMatrix %9 3
+%16 = OpTypeMatrix %7 4
+%17 = OpTypeMatrix %8 4
+%18 = OpTypeMatrix %9 4
+%19 = OpTypeStruct %4 %5 %6 %3 %7 %8 %9 %3 %10 %11 %12 %13 %14 %15 %16 %17 %18
+%21 = OpConstant  %4  2
+%20 = OpTypeArray %3 %21
+%22 = OpTypeStruct %20
+%23 = OpTypeStruct %3 %3 %8 %3 %3 %4
+%24 = OpConstant  %3  2.1524e-41
+%25 = OpConstant  %3  2.7121e-41
+%27 = OpTypePointer Private %3
+%26 = OpVariable  %27  Private %24
+%29 = OpTypeStruct %19
+%30 = OpTypePointer Uniform %29
+%28 = OpVariable  %30  Uniform
+%32 = OpTypeStruct %19
+%33 = OpTypePointer StorageBuffer %32
+%31 = OpVariable  %33  StorageBuffer
+%35 = OpTypeStruct %22
+%36 = OpTypePointer StorageBuffer %35
+%34 = OpVariable  %36  StorageBuffer
+%38 = OpTypeStruct %19
+%39 = OpTypePointer StorageBuffer %38
+%37 = OpVariable  %39  StorageBuffer
+%41 = OpTypeStruct %22
+%42 = OpTypePointer StorageBuffer %41
+%40 = OpVariable  %42  StorageBuffer
+%46 = OpTypeFunction %3 %3
+%47 = OpTypePointer Uniform %19
+%48 = OpConstant  %4  0
+%50 = OpTypePointer StorageBuffer %19
+%52 = OpTypePointer StorageBuffer %22
+%56 = OpConstant  %3  8.8991e-41
+%57 = OpConstant  %3  2.4753e-41
+%59 = OpTypePointer Function %3
+%67 = OpTypePointer Uniform %6
+%76 = OpTypePointer Uniform %3
+%77 = OpConstant  %4  3
+%84 = OpTypePointer StorageBuffer %3
+%91 = OpTypePointer StorageBuffer %7
+%92 = OpTypePointer Uniform %7
+%93 = OpConstant  %4  4
+%100 = OpTypePointer StorageBuffer %8
+%101 = OpTypePointer Uniform %8
+%102 = OpConstant  %4  5
+%109 = OpTypePointer StorageBuffer %9
+%110 = OpTypePointer Uniform %9
+%111 = OpConstant  %4  6
+%118 = OpTypePointer StorageBuffer %10
+%119 = OpTypePointer Uniform %10
+%120 = OpConstant  %4  8
+%133 = OpTypePointer StorageBuffer %11
+%134 = OpTypePointer Uniform %11
+%135 = OpConstant  %4  9
+%148 = OpTypePointer StorageBuffer %12
+%149 = OpTypePointer Uniform %12
+%150 = OpConstant  %4  10
+%163 = OpTypePointer StorageBuffer %13
+%164 = OpTypePointer Uniform %13
+%165 = OpConstant  %4  11
+%181 = OpTypePointer StorageBuffer %14
+%182 = OpTypePointer Uniform %14
+%183 = OpConstant  %4  12
+%199 = OpTypePointer StorageBuffer %15
+%200 = OpTypePointer Uniform %15
+%201 = OpConstant  %4  13
+%217 = OpTypePointer StorageBuffer %16
+%218 = OpTypePointer Uniform %16
+%219 = OpConstant  %4  14
+%238 = OpTypePointer StorageBuffer %17
+%239 = OpTypePointer Uniform %17
+%240 = OpConstant  %4  15
+%259 = OpTypePointer StorageBuffer %18
+%260 = OpTypePointer Uniform %18
+%261 = OpConstant  %4  16
+%280 = OpTypePointer StorageBuffer %20
+%319 = OpTypeVector %6 2
+%325 = OpTypeVector %6 3
+%331 = OpTypeVector %6 4
+%337 = OpTypeMatrix %319 2
+%351 = OpTypeMatrix %325 2
+%365 = OpTypeMatrix %331 2
+%379 = OpTypeMatrix %319 3
+%397 = OpTypeMatrix %325 3
+%415 = OpTypeMatrix %331 3
+%433 = OpTypeMatrix %319 4
+%455 = OpTypeMatrix %325 4
+%477 = OpTypeMatrix %331 4
+%500 = OpTypeFunction %2
+%506 = OpConstant  %3  2.2959e-41
+%509 = OpConstant  %4  7
+%45 = OpFunction  %3  None %46
+%44 = OpFunctionParameter  %3
+%43 = OpLabel
+%58 = OpVariable  %59  Function %25
+%49 = OpAccessChain  %47  %28 %48
+%51 = OpAccessChain  %50  %31 %48
+%53 = OpAccessChain  %52  %34 %48
+%54 = OpAccessChain  %50  %37 %48
+%55 = OpAccessChain  %52  %40 %48
+OpBranch %60
+%60 = OpLabel
+%61 = OpLoad  %3  %58
+%62 = OpFAdd  %3  %61 %56
+OpStore %58 %62
+%63 = OpLoad  %3  %58
+%64 = OpFAdd  %3  %63 %57
+%65 = OpLoad  %3  %58
+%66 = OpFAdd  %3  %65 %64
+OpStore %58 %66
+%68 = OpAccessChain  %67  %49 %21
+%69 = OpLoad  %6  %68
+%70 = OpLoad  %3  %58
+%71 = OpFConvert  %6  %70
+%72 = OpFAdd  %6  %69 %71
+%73 = OpFConvert  %3  %72
+%74 = OpLoad  %3  %58
+%75 = OpFAdd  %3  %74 %73
+OpStore %58 %75
+%78 = OpAccessChain  %76  %49 %77
+%79 = OpLoad  %3  %78
+%80 = OpCompositeConstruct  %8  %79 %79 %79
+%81 = OpCompositeExtract  %3  %80 2
+%82 = OpLoad  %3  %58
+%83 = OpFAdd  %3  %82 %81
+OpStore %58 %83
+%85 = OpAccessChain  %76  %49 %77
+%86 = OpLoad  %3  %85
+%87 = OpAccessChain  %84  %51 %77
+%88 = OpLoad  %3  %87
+%89 = OpFAdd  %3  %86 %88
+%90 = OpAccessChain  %84  %54 %77
+OpStore %90 %89
+%94 = OpAccessChain  %92  %49 %93
+%95 = OpLoad  %7  %94
+%96 = OpAccessChain  %91  %51 %93
+%97 = OpLoad  %7  %96
+%98 = OpFAdd  %7  %95 %97
+%99 = OpAccessChain  %91  %54 %93
+OpStore %99 %98
+%103 = OpAccessChain  %101  %49 %102
+%104 = OpLoad  %8  %103
+%105 = OpAccessChain  %100  %51 %102
+%106 = OpLoad  %8  %105
+%107 = OpFAdd  %8  %104 %106
+%108 = OpAccessChain  %100  %54 %102
+OpStore %108 %107
+%112 = OpAccessChain  %110  %49 %111
+%113 = OpLoad  %9  %112
+%114 = OpAccessChain  %109  %51 %111
+%115 = OpLoad  %9  %114
+%116 = OpFAdd  %9  %113 %115
+%117 = OpAccessChain  %109  %54 %111
+OpStore %117 %116
+%121 = OpAccessChain  %119  %49 %120
+%122 = OpLoad  %10  %121
+%123 = OpAccessChain  %118  %51 %120
+%124 = OpLoad  %10  %123
+%126 = OpCompositeExtract  %7  %122 0
+%127 = OpCompositeExtract  %7  %124 0
+%128 = OpFAdd  %7  %126 %127
+%129 = OpCompositeExtract  %7  %122 1
+%130 = OpCompositeExtract  %7  %124 1
+%131 = OpFAdd  %7  %129 %130
+%125 = OpCompositeConstruct  %10  %128 %131
+%132 = OpAccessChain  %118  %54 %120
+OpStore %132 %125
+%136 = OpAccessChain  %134  %49 %135
+%137 = OpLoad  %11  %136
+%138 = OpAccessChain  %133  %51 %135
+%139 = OpLoad  %11  %138
+%141 = OpCompositeExtract  %8  %137 0
+%142 = OpCompositeExtract  %8  %139 0
+%143 = OpFAdd  %8  %141 %142
+%144 = OpCompositeExtract  %8  %137 1
+%145 = OpCompositeExtract  %8  %139 1
+%146 = OpFAdd  %8  %144 %145
+%140 = OpCompositeConstruct  %11  %143 %146
+%147 = OpAccessChain  %133  %54 %135
+OpStore %147 %140
+%151 = OpAccessChain  %149  %49 %150
+%152 = OpLoad  %12  %151
+%153 = OpAccessChain  %148  %51 %150
+%154 = OpLoad  %12  %153
+%156 = OpCompositeExtract  %9  %152 0
+%157 = OpCompositeExtract  %9  %154 0
+%158 = OpFAdd  %9  %156 %157
+%159 = OpCompositeExtract  %9  %152 1
+%160 = OpCompositeExtract  %9  %154 1
+%161 = OpFAdd  %9  %159 %160
+%155 = OpCompositeConstruct  %12  %158 %161
+%162 = OpAccessChain  %148  %54 %150
+OpStore %162 %155
+%166 = OpAccessChain  %164  %49 %165
+%167 = OpLoad  %13  %166
+%168 = OpAccessChain  %163  %51 %165
+%169 = OpLoad  %13  %168
+%171 = OpCompositeExtract  %7  %167 0
+%172 = OpCompositeExtract  %7  %169 0
+%173 = OpFAdd  %7  %171 %172
+%174 = OpCompositeExtract  %7  %167 1
+%175 = OpCompositeExtract  %7  %169 1
+%176 = OpFAdd  %7  %174 %175
+%177 = OpCompositeExtract  %7  %167 2
+%178 = OpCompositeExtract  %7  %169 2
+%179 = OpFAdd  %7  %177 %178
+%170 = OpCompositeConstruct  %13  %173 %176 %179
+%180 = OpAccessChain  %163  %54 %165
+OpStore %180 %170
+%184 = OpAccessChain  %182  %49 %183
+%185 = OpLoad  %14  %184
+%186 = OpAccessChain  %181  %51 %183
+%187 = OpLoad  %14  %186
+%189 = OpCompositeExtract  %8  %185 0
+%190 = OpCompositeExtract  %8  %187 0
+%191 = OpFAdd  %8  %189 %190
+%192 = OpCompositeExtract  %8  %185 1
+%193 = OpCompositeExtract  %8  %187 1
+%194 = OpFAdd  %8  %192 %193
+%195 = OpCompositeExtract  %8  %185 2
+%196 = OpCompositeExtract  %8  %187 2
+%197 = OpFAdd  %8  %195 %196
+%188 = OpCompositeConstruct  %14  %191 %194 %197
+%198 = OpAccessChain  %181  %54 %183
+OpStore %198 %188
+%202 = OpAccessChain  %200  %49 %201
+%203 = OpLoad  %15  %202
+%204 = OpAccessChain  %199  %51 %201
+%205 = OpLoad  %15  %204
+%207 = OpCompositeExtract  %9  %203 0
+%208 = OpCompositeExtract  %9  %205 0
+%209 = OpFAdd  %9  %207 %208
+%210 = OpCompositeExtract  %9  %203 1
+%211 = OpCompositeExtract  %9  %205 1
+%212 = OpFAdd  %9  %210 %211
+%213 = OpCompositeExtract  %9  %203 2
+%214 = OpCompositeExtract  %9  %205 2
+%215 = OpFAdd  %9  %213 %214
+%206 = OpCompositeConstruct  %15  %209 %212 %215
+%216 = OpAccessChain  %199  %54 %201
+OpStore %216 %206
+%220 = OpAccessChain  %218  %49 %219
+%221 = OpLoad  %16  %220
+%222 = OpAccessChain  %217  %51 %219
+%223 = OpLoad  %16  %222
+%225 = OpCompositeExtract  %7  %221 0
+%226 = OpCompositeExtract  %7  %223 0
+%227 = OpFAdd  %7  %225 %226
+%228 = OpCompositeExtract  %7  %221 1
+%229 = OpCompositeExtract  %7  %223 1
+%230 = OpFAdd  %7  %228 %229
+%231 = OpCompositeExtract  %7  %221 2
+%232 = OpCompositeExtract  %7  %223 2
+%233 = OpFAdd  %7  %231 %232
+%234 = OpCompositeExtract  %7  %221 3
+%235 = OpCompositeExtract  %7  %223 3
+%236 = OpFAdd  %7  %234 %235
+%224 = OpCompositeConstruct  %16  %227 %230 %233 %236
+%237 = OpAccessChain  %217  %54 %219
+OpStore %237 %224
+%241 = OpAccessChain  %239  %49 %240
+%242 = OpLoad  %17  %241
+%243 = OpAccessChain  %238  %51 %240
+%244 = OpLoad  %17  %243
+%246 = OpCompositeExtract  %8  %242 0
+%247 = OpCompositeExtract  %8  %244 0
+%248 = OpFAdd  %8  %246 %247
+%249 = OpCompositeExtract  %8  %242 1
+%250 = OpCompositeExtract  %8  %244 1
+%251 = OpFAdd  %8  %249 %250
+%252 = OpCompositeExtract  %8  %242 2
+%253 = OpCompositeExtract  %8  %244 2
+%254 = OpFAdd  %8  %252 %253
+%255 = OpCompositeExtract  %8  %242 3
+%256 = OpCompositeExtract  %8  %244 3
+%257 = OpFAdd  %8  %255 %256
+%245 = OpCompositeConstruct  %17  %248 %251 %254 %257
+%258 = OpAccessChain  %238  %54 %240
+OpStore %258 %245
+%262 = OpAccessChain  %260  %49 %261
+%263 = OpLoad  %18  %262
+%264 = OpAccessChain  %259  %51 %261
+%265 = OpLoad  %18  %264
+%267 = OpCompositeExtract  %9  %263 0
+%268 = OpCompositeExtract  %9  %265 0
+%269 = OpFAdd  %9  %267 %268
+%270 = OpCompositeExtract  %9  %263 1
+%271 = OpCompositeExtract  %9  %265 1
+%272 = OpFAdd  %9  %270 %271
+%273 = OpCompositeExtract  %9  %263 2
+%274 = OpCompositeExtract  %9  %265 2
+%275 = OpFAdd  %9  %273 %274
+%276 = OpCompositeExtract  %9  %263 3
+%277 = OpCompositeExtract  %9  %265 3
+%278 = OpFAdd  %9  %276 %277
+%266 = OpCompositeConstruct  %18  %269 %272 %275 %278
+%279 = OpAccessChain  %259  %54 %261
+OpStore %279 %266
+%281 = OpAccessChain  %280  %53 %48
+%282 = OpLoad  %20  %281
+%283 = OpAccessChain  %280  %55 %48
+OpStore %283 %282
+%284 = OpLoad  %3  %58
+%285 = OpExtInst  %3  %1 FAbs %284
+%286 = OpLoad  %3  %58
+%287 = OpFAdd  %3  %286 %285
+OpStore %58 %287
+%288 = OpLoad  %3  %58
+%289 = OpLoad  %3  %58
+%290 = OpLoad  %3  %58
+%291 = OpExtInst  %3  %1 FClamp %288 %289 %290
+%292 = OpLoad  %3  %58
+%293 = OpFAdd  %3  %292 %291
+OpStore %58 %293
+%294 = OpLoad  %3  %58
+%295 = OpCompositeConstruct  %7  %294 %294
+%296 = OpLoad  %3  %58
+%297 = OpCompositeConstruct  %7  %296 %296
+%298 = OpDot  %3  %295 %297
+%299 = OpLoad  %3  %58
+%300 = OpFAdd  %3  %299 %298
+OpStore %58 %300
+%301 = OpLoad  %3  %58
+%302 = OpLoad  %3  %58
+%303 = OpExtInst  %3  %1 FMax %301 %302
+%304 = OpLoad  %3  %58
+%305 = OpFAdd  %3  %304 %303
+OpStore %58 %305
+%306 = OpLoad  %3  %58
+%307 = OpLoad  %3  %58
+%308 = OpExtInst  %3  %1 FMin %306 %307
+%309 = OpLoad  %3  %58
+%310 = OpFAdd  %3  %309 %308
+OpStore %58 %310
+%311 = OpLoad  %3  %58
+%312 = OpExtInst  %3  %1 FSign %311
+%313 = OpLoad  %3  %58
+%314 = OpFAdd  %3  %313 %312
+OpStore %58 %314
+%315 = OpLoad  %3  %58
+%316 = OpFAdd  %3  %315 %24
+OpStore %58 %316
+%317 = OpAccessChain  %92  %49 %93
+%318 = OpLoad  %7  %317
+%320 = OpFConvert  %319  %318
+%321 = OpFConvert  %7  %320
+%322 = OpAccessChain  %91  %54 %93
+OpStore %322 %321
+%323 = OpAccessChain  %101  %49 %102
+%324 = OpLoad  %8  %323
+%326 = OpFConvert  %325  %324
+%327 = OpFConvert  %8  %326
+%328 = OpAccessChain  %100  %54 %102
+OpStore %328 %327
+%329 = OpAccessChain  %110  %49 %111
+%330 = OpLoad  %9  %329
+%332 = OpFConvert  %331  %330
+%333 = OpFConvert  %9  %332
+%334 = OpAccessChain  %109  %54 %111
+OpStore %334 %333
+%335 = OpAccessChain  %119  %49 %120
+%336 = OpLoad  %10  %335
+%338 = OpCompositeExtract  %7  %336 0
+%339 = OpFConvert  %319  %338
+%340 = OpCompositeExtract  %7  %336 1
+%341 = OpFConvert  %319  %340
+%342 = OpCompositeConstruct  %337  %339 %341
+%343 = OpCompositeExtract  %319  %342 0
+%344 = OpFConvert  %7  %343
+%345 = OpCompositeExtract  %319  %342 1
+%346 = OpFConvert  %7  %345
+%347 = OpCompositeConstruct  %10  %344 %346
+%348 = OpAccessChain  %118  %54 %120
+OpStore %348 %347
+%349 = OpAccessChain  %134  %49 %135
+%350 = OpLoad  %11  %349
+%352 = OpCompositeExtract  %8  %350 0
+%353 = OpFConvert  %325  %352
+%354 = OpCompositeExtract  %8  %350 1
+%355 = OpFConvert  %325  %354
+%356 = OpCompositeConstruct  %351  %353 %355
+%357 = OpCompositeExtract  %325  %356 0
+%358 = OpFConvert  %8  %357
+%359 = OpCompositeExtract  %325  %356 1
+%360 = OpFConvert  %8  %359
+%361 = OpCompositeConstruct  %11  %358 %360
+%362 = OpAccessChain  %133  %54 %135
+OpStore %362 %361
+%363 = OpAccessChain  %149  %49 %150
+%364 = OpLoad  %12  %363
+%366 = OpCompositeExtract  %9  %364 0
+%367 = OpFConvert  %331  %366
+%368 = OpCompositeExtract  %9  %364 1
+%369 = OpFConvert  %331  %368
+%370 = OpCompositeConstruct  %365  %367 %369
+%371 = OpCompositeExtract  %331  %370 0
+%372 = OpFConvert  %9  %371
+%373 = OpCompositeExtract  %331  %370 1
+%374 = OpFConvert  %9  %373
+%375 = OpCompositeConstruct  %12  %372 %374
+%376 = OpAccessChain  %148  %54 %150
+OpStore %376 %375
+%377 = OpAccessChain  %164  %49 %165
+%378 = OpLoad  %13  %377
+%380 = OpCompositeExtract  %7  %378 0
+%381 = OpFConvert  %319  %380
+%382 = OpCompositeExtract  %7  %378 1
+%383 = OpFConvert  %319  %382
+%384 = OpCompositeExtract  %7  %378 2
+%385 = OpFConvert  %319  %384
+%386 = OpCompositeConstruct  %379  %381 %383 %385
+%387 = OpCompositeExtract  %319  %386 0
+%388 = OpFConvert  %7  %387
+%389 = OpCompositeExtract  %319  %386 1
+%390 = OpFConvert  %7  %389
+%391 = OpCompositeExtract  %319  %386 2
+%392 = OpFConvert  %7  %391
+%393 = OpCompositeConstruct  %13  %388 %390 %392
+%394 = OpAccessChain  %163  %54 %165
+OpStore %394 %393
+%395 = OpAccessChain  %182  %49 %183
+%396 = OpLoad  %14  %395
+%398 = OpCompositeExtract  %8  %396 0
+%399 = OpFConvert  %325  %398
+%400 = OpCompositeExtract  %8  %396 1
+%401 = OpFConvert  %325  %400
+%402 = OpCompositeExtract  %8  %396 2
+%403 = OpFConvert  %325  %402
+%404 = OpCompositeConstruct  %397  %399 %401 %403
+%405 = OpCompositeExtract  %325  %404 0
+%406 = OpFConvert  %8  %405
+%407 = OpCompositeExtract  %325  %404 1
+%408 = OpFConvert  %8  %407
+%409 = OpCompositeExtract  %325  %404 2
+%410 = OpFConvert  %8  %409
+%411 = OpCompositeConstruct  %14  %406 %408 %410
+%412 = OpAccessChain  %181  %54 %183
+OpStore %412 %411
+%413 = OpAccessChain  %200  %49 %201
+%414 = OpLoad  %15  %413
+%416 = OpCompositeExtract  %9  %414 0
+%417 = OpFConvert  %331  %416
+%418 = OpCompositeExtract  %9  %414 1
+%419 = OpFConvert  %331  %418
+%420 = OpCompositeExtract  %9  %414 2
+%421 = OpFConvert  %331  %420
+%422 = OpCompositeConstruct  %415  %417 %419 %421
+%423 = OpCompositeExtract  %331  %422 0
+%424 = OpFConvert  %9  %423
+%425 = OpCompositeExtract  %331  %422 1
+%426 = OpFConvert  %9  %425
+%427 = OpCompositeExtract  %331  %422 2
+%428 = OpFConvert  %9  %427
+%429 = OpCompositeConstruct  %15  %424 %426 %428
+%430 = OpAccessChain  %199  %54 %201
+OpStore %430 %429
+%431 = OpAccessChain  %218  %49 %219
+%432 = OpLoad  %16  %431
+%434 = OpCompositeExtract  %7  %432 0
+%435 = OpFConvert  %319  %434
+%436 = OpCompositeExtract  %7  %432 1
+%437 = OpFConvert  %319  %436
+%438 = OpCompositeExtract  %7  %432 2
+%439 = OpFConvert  %319  %438
+%440 = OpCompositeExtract  %7  %432 3
+%441 = OpFConvert  %319  %440
+%442 = OpCompositeConstruct  %433  %435 %437 %439 %441
+%443 = OpCompositeExtract  %319  %442 0
+%444 = OpFConvert  %7  %443
+%445 = OpCompositeExtract  %319  %442 1
+%446 = OpFConvert  %7  %445
+%447 = OpCompositeExtract  %319  %442 2
+%448 = OpFConvert  %7  %447
+%449 = OpCompositeExtract  %319  %442 3
+%450 = OpFConvert  %7  %449
+%451 = OpCompositeConstruct  %16  %444 %446 %448 %450
+%452 = OpAccessChain  %217  %54 %219
+OpStore %452 %451
+%453 = OpAccessChain  %239  %49 %240
+%454 = OpLoad  %17  %453
+%456 = OpCompositeExtract  %8  %454 0
+%457 = OpFConvert  %325  %456
+%458 = OpCompositeExtract  %8  %454 1
+%459 = OpFConvert  %325  %458
+%460 = OpCompositeExtract  %8  %454 2
+%461 = OpFConvert  %325  %460
+%462 = OpCompositeExtract  %8  %454 3
+%463 = OpFConvert  %325  %462
+%464 = OpCompositeConstruct  %455  %457 %459 %461 %463
+%465 = OpCompositeExtract  %325  %464 0
+%466 = OpFConvert  %8  %465
+%467 = OpCompositeExtract  %325  %464 1
+%468 = OpFConvert  %8  %467
+%469 = OpCompositeExtract  %325  %464 2
+%470 = OpFConvert  %8  %469
+%471 = OpCompositeExtract  %325  %464 3
+%472 = OpFConvert  %8  %471
+%473 = OpCompositeConstruct  %17  %466 %468 %470 %472
+%474 = OpAccessChain  %238  %54 %240
+OpStore %474 %473
+%475 = OpAccessChain  %260  %49 %261
+%476 = OpLoad  %18  %475
+%478 = OpCompositeExtract  %9  %476 0
+%479 = OpFConvert  %331  %478
+%480 = OpCompositeExtract  %9  %476 1
+%481 = OpFConvert  %331  %480
+%482 = OpCompositeExtract  %9  %476 2
+%483 = OpFConvert  %331  %482
+%484 = OpCompositeExtract  %9  %476 3
+%485 = OpFConvert  %331  %484
+%486 = OpCompositeConstruct  %477  %479 %481 %483 %485
+%487 = OpCompositeExtract  %331  %486 0
+%488 = OpFConvert  %9  %487
+%489 = OpCompositeExtract  %331  %486 1
+%490 = OpFConvert  %9  %489
+%491 = OpCompositeExtract  %331  %486 2
+%492 = OpFConvert  %9  %491
+%493 = OpCompositeExtract  %331  %486 3
+%494 = OpFConvert  %9  %493
+%495 = OpCompositeConstruct  %18  %488 %490 %492 %494
+%496 = OpAccessChain  %259  %54 %261
+OpStore %496 %495
+%497 = OpLoad  %3  %58
+OpReturnValue %497
+OpFunctionEnd
+%499 = OpFunction  %2  None %500
+%498 = OpLabel
+%501 = OpAccessChain  %47  %28 %48
+%502 = OpAccessChain  %50  %31 %48
+%503 = OpAccessChain  %52  %34 %48
+%504 = OpAccessChain  %50  %37 %48
+%505 = OpAccessChain  %52  %40 %48
+OpBranch %507
+%507 = OpLabel
+%508 = OpFunctionCall  %3  %45 %506
+%510 = OpAccessChain  %84  %504 %509
+OpStore %510 %508
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/image.spvasm b/naga/tests/out/spv/image.spvasm
index 152ede488f..f1303d23ab 100644
--- a/naga/tests/out/spv/image.spvasm
+++ b/naga/tests/out/spv/image.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 547
+; Bound: 545
 OpCapability Shader
 OpCapability Image1D
 OpCapability Sampled1D
@@ -10,19 +10,19 @@ OpCapability ImageQuery
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %96 "main" %93
-OpEntryPoint GLCompute %190 "depth_load" %188
-OpEntryPoint Vertex %210 "queries" %208
-OpEntryPoint Vertex %262 "levels_queries" %261
-OpEntryPoint Fragment %293 "texture_sample" %292
-OpEntryPoint Fragment %440 "texture_sample_comparison" %438
-OpEntryPoint Fragment %496 "gather" %495
-OpEntryPoint Fragment %530 "depth_no_comparison" %529
+OpEntryPoint GLCompute %188 "depth_load" %186
+OpEntryPoint Vertex %208 "queries" %206
+OpEntryPoint Vertex %260 "levels_queries" %259
+OpEntryPoint Fragment %291 "texture_sample" %290
+OpEntryPoint Fragment %438 "texture_sample_comparison" %436
+OpEntryPoint Fragment %494 "gather" %493
+OpEntryPoint Fragment %528 "depth_no_comparison" %527
 OpExecutionMode %96 LocalSize 16 1 1
-OpExecutionMode %190 LocalSize 16 1 1
-OpExecutionMode %293 OriginUpperLeft
-OpExecutionMode %440 OriginUpperLeft
-OpExecutionMode %496 OriginUpperLeft
-OpExecutionMode %530 OriginUpperLeft
+OpExecutionMode %188 LocalSize 16 1 1
+OpExecutionMode %291 OriginUpperLeft
+OpExecutionMode %438 OriginUpperLeft
+OpExecutionMode %494 OriginUpperLeft
+OpExecutionMode %528 OriginUpperLeft
 %3 = OpString "image.wgsl"
 OpSource Unknown 0 %3 "@group(0) @binding(0)
 var image_mipmapped_src: texture_2d<u32>;
@@ -248,16 +248,16 @@ OpName %72 "lhs"
 OpName %73 "rhs"
 OpName %93 "local_id"
 OpName %96 "main"
-OpName %188 "local_id"
-OpName %190 "depth_load"
-OpName %210 "queries"
-OpName %262 "levels_queries"
-OpName %293 "texture_sample"
-OpName %308 "a"
-OpName %440 "texture_sample_comparison"
-OpName %445 "a"
-OpName %496 "gather"
-OpName %530 "depth_no_comparison"
+OpName %186 "local_id"
+OpName %188 "depth_load"
+OpName %208 "queries"
+OpName %260 "levels_queries"
+OpName %291 "texture_sample"
+OpName %306 "a"
+OpName %438 "texture_sample_comparison"
+OpName %443 "a"
+OpName %494 "gather"
+OpName %528 "depth_no_comparison"
 OpDecorate %29 DescriptorSet 0
 OpDecorate %29 Binding 0
 OpDecorate %31 DescriptorSet 0
@@ -306,13 +306,13 @@ OpDecorate %66 Binding 3
 OpDecorate %68 DescriptorSet 1
 OpDecorate %68 Binding 4
 OpDecorate %93 BuiltIn LocalInvocationId
-OpDecorate %188 BuiltIn LocalInvocationId
-OpDecorate %208 BuiltIn Position
-OpDecorate %261 BuiltIn Position
-OpDecorate %292 Location 0
-OpDecorate %438 Location 0
-OpDecorate %495 Location 0
-OpDecorate %529 Location 0
+OpDecorate %186 BuiltIn LocalInvocationId
+OpDecorate %206 BuiltIn Position
+OpDecorate %259 BuiltIn Position
+OpDecorate %290 Location 0
+OpDecorate %436 Location 0
+OpDecorate %493 Location 0
+OpDecorate %527 Location 0
 %2 = OpTypeVoid
 %5 = OpTypeInt 32 0
 %4 = OpTypeImage %5 2D 0 0 0 1 Unknown
@@ -399,43 +399,43 @@ OpDecorate %529 Location 0
 %106 = OpConstantComposite  %14  %104 %105
 %108 = OpTypeVector %5 2
 %116 = OpTypeVector %5 4
-%130 = OpTypeVector %15 3
-%188 = OpVariable  %94  Input
-%209 = OpTypePointer Output %24
-%208 = OpVariable  %209  Output
-%219 = OpConstant  %5  0
-%261 = OpVariable  %209  Output
-%292 = OpVariable  %209  Output
-%299 = OpConstant  %8  0.5
-%300 = OpTypeVector %8 2
-%301 = OpConstantComposite  %300  %299 %299
-%302 = OpTypeVector %8 3
-%303 = OpConstantComposite  %302  %299 %299 %299
-%304 = OpConstant  %15  3
-%305 = OpConstantComposite  %14  %304 %88
-%306 = OpConstant  %8  2.3
-%307 = OpConstant  %8  2.0
-%309 = OpTypePointer Function %24
-%310 = OpConstantNull  %24
-%312 = OpTypeSampledImage %16
-%317 = OpTypeSampledImage %17
-%338 = OpTypeSampledImage %19
-%399 = OpTypeSampledImage %21
-%439 = OpTypePointer Output %8
-%438 = OpVariable  %439  Output
-%446 = OpTypePointer Function %8
-%447 = OpConstantNull  %8
-%449 = OpTypeSampledImage %26
-%454 = OpTypeSampledImage %27
-%467 = OpTypeSampledImage %28
-%474 = OpConstant  %8  0.0
-%495 = OpVariable  %209  Output
-%506 = OpConstant  %5  1
-%509 = OpConstant  %5  3
-%514 = OpTypeSampledImage %4
-%517 = OpTypeVector %15 4
-%518 = OpTypeSampledImage %18
-%529 = OpVariable  %209  Output
+%129 = OpTypeVector %15 3
+%186 = OpVariable  %94  Input
+%207 = OpTypePointer Output %24
+%206 = OpVariable  %207  Output
+%217 = OpConstant  %5  0
+%259 = OpVariable  %207  Output
+%290 = OpVariable  %207  Output
+%297 = OpConstant  %8  0.5
+%298 = OpTypeVector %8 2
+%299 = OpConstantComposite  %298  %297 %297
+%300 = OpTypeVector %8 3
+%301 = OpConstantComposite  %300  %297 %297 %297
+%302 = OpConstant  %15  3
+%303 = OpConstantComposite  %14  %302 %88
+%304 = OpConstant  %8  2.3
+%305 = OpConstant  %8  2.0
+%307 = OpTypePointer Function %24
+%308 = OpConstantNull  %24
+%310 = OpTypeSampledImage %16
+%315 = OpTypeSampledImage %17
+%336 = OpTypeSampledImage %19
+%397 = OpTypeSampledImage %21
+%437 = OpTypePointer Output %8
+%436 = OpVariable  %437  Output
+%444 = OpTypePointer Function %8
+%445 = OpConstantNull  %8
+%447 = OpTypeSampledImage %26
+%452 = OpTypeSampledImage %27
+%465 = OpTypeSampledImage %28
+%472 = OpConstant  %8  0.0
+%493 = OpVariable  %207  Output
+%504 = OpConstant  %5  1
+%507 = OpConstant  %5  3
+%512 = OpTypeSampledImage %4
+%515 = OpTypeVector %15 4
+%516 = OpTypeSampledImage %18
+%527 = OpVariable  %207  Output
 %70 = OpFunction  %14  None %71
 %72 = OpFunctionParameter  %14
 %73 = OpFunctionParameter  %14
@@ -474,586 +474,586 @@ OpLine %3 23 18
 %117 = OpImageFetch  %116  %98 %113 Lod %115
 OpLine %3 25 20
 %118 = OpCompositeExtract  %5  %95 2
-%120 = OpImageFetch  %116  %98 %113 Lod %118
+%119 = OpImageFetch  %116  %98 %113 Lod %118
 OpLine %3 26 18
-%121 = OpCompositeExtract  %5  %95 2
-%122 = OpBitcast  %15  %121
-%123 = OpImageFetch  %116  %99 %113 Sample %122
+%120 = OpCompositeExtract  %5  %95 2
+%121 = OpBitcast  %15  %120
+%122 = OpImageFetch  %116  %99 %113 Sample %121
 OpLine %3 27 18
-%124 = OpImageRead  %116  %100 %113
+%123 = OpImageRead  %116  %100 %113
 OpLine %3 28 52
+%124 = OpCompositeExtract  %5  %95 2
 %125 = OpCompositeExtract  %5  %95 2
-%126 = OpCompositeExtract  %5  %95 2
-%127 = OpBitcast  %15  %126
+%126 = OpBitcast  %15  %125
 OpLine %3 28 18
-%128 = OpIAdd  %15  %127 %88
-%129 = OpBitcast  %15  %125
-%131 = OpCompositeConstruct  %130  %113 %129
-%132 = OpImageFetch  %116  %101 %131 Lod %128
+%127 = OpIAdd  %15  %126 %88
+%128 = OpBitcast  %15  %124
+%130 = OpCompositeConstruct  %129  %113 %128
+%131 = OpImageFetch  %116  %101 %130 Lod %127
 OpLine %3 29 52
-%133 = OpCompositeExtract  %5  %95 2
-%134 = OpBitcast  %15  %133
-%135 = OpCompositeExtract  %5  %95 2
-%136 = OpBitcast  %15  %135
+%132 = OpCompositeExtract  %5  %95 2
+%133 = OpBitcast  %15  %132
+%134 = OpCompositeExtract  %5  %95 2
+%135 = OpBitcast  %15  %134
 OpLine %3 29 18
-%137 = OpIAdd  %15  %136 %88
-%138 = OpCompositeConstruct  %130  %113 %134
-%139 = OpImageFetch  %116  %101 %138 Lod %137
+%136 = OpIAdd  %15  %135 %88
+%137 = OpCompositeConstruct  %129  %113 %133
+%138 = OpImageFetch  %116  %101 %137 Lod %136
 OpLine %3 30 18
-%140 = OpCompositeExtract  %5  %95 0
-%141 = OpBitcast  %15  %140
-%142 = OpCompositeExtract  %5  %95 2
-%143 = OpBitcast  %15  %142
-%144 = OpImageFetch  %116  %102 %141 Lod %143
+%139 = OpCompositeExtract  %5  %95 0
+%140 = OpBitcast  %15  %139
+%141 = OpCompositeExtract  %5  %95 2
+%142 = OpBitcast  %15  %141
+%143 = OpImageFetch  %116  %102 %140 Lod %142
 OpLine %3 32 19
-%145 = OpBitcast  %108  %113
-%146 = OpCompositeExtract  %5  %95 2
-%147 = OpBitcast  %15  %146
-%148 = OpImageFetch  %116  %98 %145 Lod %147
+%144 = OpBitcast  %108  %113
+%145 = OpCompositeExtract  %5  %95 2
+%146 = OpBitcast  %15  %145
+%147 = OpImageFetch  %116  %98 %144 Lod %146
 OpLine %3 33 19
-%149 = OpBitcast  %108  %113
-%150 = OpCompositeExtract  %5  %95 2
-%151 = OpBitcast  %15  %150
-%152 = OpImageFetch  %116  %99 %149 Sample %151
+%148 = OpBitcast  %108  %113
+%149 = OpCompositeExtract  %5  %95 2
+%150 = OpBitcast  %15  %149
+%151 = OpImageFetch  %116  %99 %148 Sample %150
 OpLine %3 34 19
-%153 = OpBitcast  %108  %113
-%154 = OpImageRead  %116  %100 %153
+%152 = OpBitcast  %108  %113
+%153 = OpImageRead  %116  %100 %152
 OpLine %3 35 48
-%155 = OpBitcast  %108  %113
+%154 = OpBitcast  %108  %113
+%155 = OpCompositeExtract  %5  %95 2
 %156 = OpCompositeExtract  %5  %95 2
-%157 = OpCompositeExtract  %5  %95 2
-%158 = OpBitcast  %15  %157
+%157 = OpBitcast  %15  %156
 OpLine %3 35 19
-%159 = OpIAdd  %15  %158 %88
-%160 = OpCompositeConstruct  %13  %155 %156
-%161 = OpImageFetch  %116  %101 %160 Lod %159
+%158 = OpIAdd  %15  %157 %88
+%159 = OpCompositeConstruct  %13  %154 %155
+%160 = OpImageFetch  %116  %101 %159 Lod %158
 OpLine %3 36 48
-%162 = OpBitcast  %108  %113
-%163 = OpCompositeExtract  %5  %95 2
-%164 = OpBitcast  %15  %163
-%165 = OpCompositeExtract  %5  %95 2
-%166 = OpBitcast  %15  %165
+%161 = OpBitcast  %108  %113
+%162 = OpCompositeExtract  %5  %95 2
+%163 = OpBitcast  %15  %162
+%164 = OpCompositeExtract  %5  %95 2
+%165 = OpBitcast  %15  %164
 OpLine %3 36 19
-%167 = OpIAdd  %15  %166 %88
-%168 = OpBitcast  %5  %164
-%169 = OpCompositeConstruct  %13  %162 %168
-%170 = OpImageFetch  %116  %101 %169 Lod %167
+%166 = OpIAdd  %15  %165 %88
+%167 = OpBitcast  %5  %163
+%168 = OpCompositeConstruct  %13  %161 %167
+%169 = OpImageFetch  %116  %101 %168 Lod %166
 OpLine %3 37 19
-%171 = OpCompositeExtract  %5  %95 0
-%173 = OpCompositeExtract  %5  %95 2
-%174 = OpBitcast  %15  %173
-%175 = OpImageFetch  %116  %102 %171 Lod %174
+%170 = OpCompositeExtract  %5  %95 0
+%171 = OpCompositeExtract  %5  %95 2
+%172 = OpBitcast  %15  %171
+%173 = OpImageFetch  %116  %102 %170 Lod %172
 OpLine %3 39 29
-%176 = OpCompositeExtract  %15  %113 0
-%177 = OpIAdd  %116  %117 %123
-%178 = OpIAdd  %116  %177 %124
-%179 = OpIAdd  %116  %178 %132
-%180 = OpIAdd  %116  %179 %139
+%174 = OpCompositeExtract  %15  %113 0
+%175 = OpIAdd  %116  %117 %122
+%176 = OpIAdd  %116  %175 %123
+%177 = OpIAdd  %116  %176 %131
+%178 = OpIAdd  %116  %177 %138
 OpLine %3 39 5
-OpImageWrite %103 %176 %180
+OpImageWrite %103 %174 %178
 OpLine %3 41 29
-%181 = OpCompositeExtract  %15  %113 0
-%182 = OpBitcast  %5  %181
-%183 = OpIAdd  %116  %148 %152
-%184 = OpIAdd  %116  %183 %154
-%185 = OpIAdd  %116  %184 %161
-%186 = OpIAdd  %116  %185 %170
+%179 = OpCompositeExtract  %15  %113 0
+%180 = OpBitcast  %5  %179
+%181 = OpIAdd  %116  %147 %151
+%182 = OpIAdd  %116  %181 %153
+%183 = OpIAdd  %116  %182 %160
+%184 = OpIAdd  %116  %183 %169
 OpLine %3 41 5
-OpImageWrite %103 %182 %186
+OpImageWrite %103 %180 %184
 OpReturn
 OpFunctionEnd
-%190 = OpFunction  %2  None %97
-%187 = OpLabel
-%189 = OpLoad  %13  %188
-%191 = OpLoad  %7  %33
-%192 = OpLoad  %9  %35
-%193 = OpLoad  %11  %43
-OpBranch %194
-%194 = OpLabel
+%188 = OpFunction  %2  None %97
+%185 = OpLabel
+%187 = OpLoad  %13  %186
+%189 = OpLoad  %7  %33
+%190 = OpLoad  %9  %35
+%191 = OpLoad  %11  %43
+OpBranch %192
+%192 = OpLabel
 OpLine %3 46 26
-%195 = OpImageQuerySize  %108  %192
+%193 = OpImageQuerySize  %108  %190
 OpLine %3 47 27
-%196 = OpVectorShuffle  %108  %189 %189 0 1
-%197 = OpIMul  %108  %195 %196
-%198 = OpBitcast  %14  %197
+%194 = OpVectorShuffle  %108  %187 %187 0 1
+%195 = OpIMul  %108  %193 %194
+%196 = OpBitcast  %14  %195
 OpLine %3 47 27
-%199 = OpFunctionCall  %14  %70 %198 %106
+%197 = OpFunctionCall  %14  %70 %196 %106
 OpLine %3 48 20
-%200 = OpCompositeExtract  %5  %189 2
-%201 = OpBitcast  %15  %200
-%202 = OpImageFetch  %24  %191 %199 Sample %201
-%203 = OpCompositeExtract  %8  %202 0
+%198 = OpCompositeExtract  %5  %187 2
+%199 = OpBitcast  %15  %198
+%200 = OpImageFetch  %24  %189 %197 Sample %199
+%201 = OpCompositeExtract  %8  %200 0
 OpLine %3 49 29
-%204 = OpCompositeExtract  %15  %199 0
-%205 = OpConvertFToU  %5  %203
-%206 = OpCompositeConstruct  %116  %205 %205 %205 %205
+%202 = OpCompositeExtract  %15  %197 0
+%203 = OpConvertFToU  %5  %201
+%204 = OpCompositeConstruct  %116  %203 %203 %203 %203
 OpLine %3 49 5
-OpImageWrite %193 %204 %206
+OpImageWrite %191 %202 %204
 OpReturn
 OpFunctionEnd
-%210 = OpFunction  %2  None %97
-%207 = OpLabel
-%211 = OpLoad  %16  %44
-%212 = OpLoad  %17  %46
-%213 = OpLoad  %19  %51
-%214 = OpLoad  %20  %53
-%215 = OpLoad  %21  %55
-%216 = OpLoad  %22  %57
-%217 = OpLoad  %23  %59
-OpBranch %218
-%218 = OpLabel
+%208 = OpFunction  %2  None %97
+%205 = OpLabel
+%209 = OpLoad  %16  %44
+%210 = OpLoad  %17  %46
+%211 = OpLoad  %19  %51
+%212 = OpLoad  %20  %53
+%213 = OpLoad  %21  %55
+%214 = OpLoad  %22  %57
+%215 = OpLoad  %23  %59
+OpBranch %216
+%216 = OpLabel
 OpLine %3 74 18
-%220 = OpImageQuerySizeLod  %5  %211 %219
+%218 = OpImageQuerySizeLod  %5  %209 %217
 OpLine %3 75 22
-%221 = OpBitcast  %15  %220
-%222 = OpImageQuerySizeLod  %5  %211 %221
+%219 = OpBitcast  %15  %218
+%220 = OpImageQuerySizeLod  %5  %209 %219
 OpLine %3 76 18
-%223 = OpImageQuerySizeLod  %108  %212 %219
+%221 = OpImageQuerySizeLod  %108  %210 %217
 OpLine %3 77 22
-%224 = OpImageQuerySizeLod  %108  %212 %88
+%222 = OpImageQuerySizeLod  %108  %210 %88
 OpLine %3 78 24
-%225 = OpImageQuerySizeLod  %13  %213 %219
-%226 = OpVectorShuffle  %108  %225 %225 0 1
+%223 = OpImageQuerySizeLod  %13  %211 %217
+%224 = OpVectorShuffle  %108  %223 %223 0 1
 OpLine %3 79 28
-%227 = OpImageQuerySizeLod  %13  %213 %88
-%228 = OpVectorShuffle  %108  %227 %227 0 1
+%225 = OpImageQuerySizeLod  %13  %211 %88
+%226 = OpVectorShuffle  %108  %225 %225 0 1
 OpLine %3 80 20
-%229 = OpImageQuerySizeLod  %108  %214 %219
+%227 = OpImageQuerySizeLod  %108  %212 %217
 OpLine %3 81 24
-%230 = OpImageQuerySizeLod  %108  %214 %88
+%228 = OpImageQuerySizeLod  %108  %212 %88
 OpLine %3 82 26
-%231 = OpImageQuerySizeLod  %13  %215 %219
-%232 = OpVectorShuffle  %108  %231 %231 0 0
+%229 = OpImageQuerySizeLod  %13  %213 %217
+%230 = OpVectorShuffle  %108  %229 %229 0 0
 OpLine %3 83 30
-%233 = OpImageQuerySizeLod  %13  %215 %88
-%234 = OpVectorShuffle  %108  %233 %233 0 0
+%231 = OpImageQuerySizeLod  %13  %213 %88
+%232 = OpVectorShuffle  %108  %231 %231 0 0
 OpLine %3 84 18
-%235 = OpImageQuerySizeLod  %13  %216 %219
+%233 = OpImageQuerySizeLod  %13  %214 %217
 OpLine %3 85 22
-%236 = OpImageQuerySizeLod  %13  %216 %88
+%234 = OpImageQuerySizeLod  %13  %214 %88
 OpLine %3 86 21
-%237 = OpImageQuerySize  %108  %217
+%235 = OpImageQuerySize  %108  %215
 OpLine %3 88 15
-%238 = OpCompositeExtract  %5  %223 1
-%239 = OpIAdd  %5  %220 %238
+%236 = OpCompositeExtract  %5  %221 1
+%237 = OpIAdd  %5  %218 %236
+%238 = OpCompositeExtract  %5  %222 1
+%239 = OpIAdd  %5  %237 %238
 %240 = OpCompositeExtract  %5  %224 1
 %241 = OpIAdd  %5  %239 %240
 %242 = OpCompositeExtract  %5  %226 1
 %243 = OpIAdd  %5  %241 %242
-%244 = OpCompositeExtract  %5  %228 1
+%244 = OpCompositeExtract  %5  %227 1
 %245 = OpIAdd  %5  %243 %244
-%246 = OpCompositeExtract  %5  %229 1
+%246 = OpCompositeExtract  %5  %228 1
 %247 = OpIAdd  %5  %245 %246
 %248 = OpCompositeExtract  %5  %230 1
 %249 = OpIAdd  %5  %247 %248
 %250 = OpCompositeExtract  %5  %232 1
 %251 = OpIAdd  %5  %249 %250
-%252 = OpCompositeExtract  %5  %234 1
+%252 = OpCompositeExtract  %5  %233 2
 %253 = OpIAdd  %5  %251 %252
-%254 = OpCompositeExtract  %5  %235 2
+%254 = OpCompositeExtract  %5  %234 2
 %255 = OpIAdd  %5  %253 %254
-%256 = OpCompositeExtract  %5  %236 2
-%257 = OpIAdd  %5  %255 %256
 OpLine %3 91 12
-%258 = OpConvertUToF  %8  %257
-%259 = OpCompositeConstruct  %24  %258 %258 %258 %258
-OpStore %208 %259
+%256 = OpConvertUToF  %8  %255
+%257 = OpCompositeConstruct  %24  %256 %256 %256 %256
+OpStore %206 %257
 OpReturn
 OpFunctionEnd
-%262 = OpFunction  %2  None %97
-%260 = OpLabel
-%263 = OpLoad  %17  %46
-%264 = OpLoad  %19  %51
-%265 = OpLoad  %20  %53
-%266 = OpLoad  %21  %55
-%267 = OpLoad  %22  %57
-%268 = OpLoad  %23  %59
-OpBranch %269
-%269 = OpLabel
+%260 = OpFunction  %2  None %97
+%258 = OpLabel
+%261 = OpLoad  %17  %46
+%262 = OpLoad  %19  %51
+%263 = OpLoad  %20  %53
+%264 = OpLoad  %21  %55
+%265 = OpLoad  %22  %57
+%266 = OpLoad  %23  %59
+OpBranch %267
+%267 = OpLabel
 OpLine %3 96 25
-%270 = OpImageQueryLevels  %5  %263
+%268 = OpImageQueryLevels  %5  %261
 OpLine %3 97 25
-%271 = OpImageQuerySizeLod  %13  %264 %219
-%272 = OpCompositeExtract  %5  %271 2
+%269 = OpImageQuerySizeLod  %13  %262 %217
+%270 = OpCompositeExtract  %5  %269 2
 OpLine %3 98 31
-%273 = OpImageQueryLevels  %5  %264
+%271 = OpImageQueryLevels  %5  %262
 OpLine %3 99 31
-%274 = OpImageQuerySizeLod  %13  %264 %219
-%275 = OpCompositeExtract  %5  %274 2
+%272 = OpImageQuerySizeLod  %13  %262 %217
+%273 = OpCompositeExtract  %5  %272 2
 OpLine %3 100 27
-%276 = OpImageQueryLevels  %5  %265
+%274 = OpImageQueryLevels  %5  %263
 OpLine %3 101 33
-%277 = OpImageQueryLevels  %5  %266
+%275 = OpImageQueryLevels  %5  %264
 OpLine %3 102 27
-%278 = OpImageQuerySizeLod  %13  %266 %219
-%279 = OpCompositeExtract  %5  %278 2
+%276 = OpImageQuerySizeLod  %13  %264 %217
+%277 = OpCompositeExtract  %5  %276 2
 OpLine %3 103 25
-%280 = OpImageQueryLevels  %5  %267
+%278 = OpImageQueryLevels  %5  %265
 OpLine %3 104 26
-%281 = OpImageQuerySamples  %5  %268
+%279 = OpImageQuerySamples  %5  %266
 OpLine %3 106 15
-%282 = OpIAdd  %5  %272 %279
-%283 = OpIAdd  %5  %282 %281
-%284 = OpIAdd  %5  %283 %270
-%285 = OpIAdd  %5  %284 %273
-%286 = OpIAdd  %5  %285 %280
-%287 = OpIAdd  %5  %286 %276
-%288 = OpIAdd  %5  %287 %277
+%280 = OpIAdd  %5  %270 %277
+%281 = OpIAdd  %5  %280 %279
+%282 = OpIAdd  %5  %281 %268
+%283 = OpIAdd  %5  %282 %271
+%284 = OpIAdd  %5  %283 %278
+%285 = OpIAdd  %5  %284 %274
+%286 = OpIAdd  %5  %285 %275
 OpLine %3 108 12
-%289 = OpConvertUToF  %8  %288
-%290 = OpCompositeConstruct  %24  %289 %289 %289 %289
-OpStore %261 %290
+%287 = OpConvertUToF  %8  %286
+%288 = OpCompositeConstruct  %24  %287 %287 %287 %287
+OpStore %259 %288
 OpReturn
 OpFunctionEnd
-%293 = OpFunction  %2  None %97
-%291 = OpLabel
-%308 = OpVariable  %309  Function %310
-%294 = OpLoad  %16  %44
-%295 = OpLoad  %17  %46
-%296 = OpLoad  %19  %51
-%297 = OpLoad  %21  %55
-%298 = OpLoad  %25  %61
-OpBranch %311
-%311 = OpLabel
+%291 = OpFunction  %2  None %97
+%289 = OpLabel
+%306 = OpVariable  %307  Function %308
+%292 = OpLoad  %16  %44
+%293 = OpLoad  %17  %46
+%294 = OpLoad  %19  %51
+%295 = OpLoad  %21  %55
+%296 = OpLoad  %25  %61
+OpBranch %309
+%309 = OpLabel
 OpLine %3 116 16
 OpLine %3 117 17
 OpLine %3 118 20
 OpLine %3 121 5
-%313 = OpSampledImage  %312  %294 %298
-%314 = OpImageSampleImplicitLod  %24  %313 %299
-%315 = OpLoad  %24  %308
-%316 = OpFAdd  %24  %315 %314
+%311 = OpSampledImage  %310  %292 %296
+%312 = OpImageSampleImplicitLod  %24  %311 %297
+%313 = OpLoad  %24  %306
+%314 = OpFAdd  %24  %313 %312
 OpLine %3 121 5
-OpStore %308 %316
+OpStore %306 %314
 OpLine %3 122 5
-%318 = OpSampledImage  %317  %295 %298
-%319 = OpImageSampleImplicitLod  %24  %318 %301
-%320 = OpLoad  %24  %308
-%321 = OpFAdd  %24  %320 %319
+%316 = OpSampledImage  %315  %293 %296
+%317 = OpImageSampleImplicitLod  %24  %316 %299
+%318 = OpLoad  %24  %306
+%319 = OpFAdd  %24  %318 %317
 OpLine %3 122 5
-OpStore %308 %321
+OpStore %306 %319
 OpLine %3 123 5
-%322 = OpSampledImage  %317  %295 %298
-%323 = OpImageSampleImplicitLod  %24  %322 %301 ConstOffset %305
-%324 = OpLoad  %24  %308
-%325 = OpFAdd  %24  %324 %323
+%320 = OpSampledImage  %315  %293 %296
+%321 = OpImageSampleImplicitLod  %24  %320 %299 ConstOffset %303
+%322 = OpLoad  %24  %306
+%323 = OpFAdd  %24  %322 %321
 OpLine %3 123 5
-OpStore %308 %325
+OpStore %306 %323
 OpLine %3 124 5
-%326 = OpSampledImage  %317  %295 %298
-%327 = OpImageSampleExplicitLod  %24  %326 %301 Lod %306
-%328 = OpLoad  %24  %308
-%329 = OpFAdd  %24  %328 %327
+%324 = OpSampledImage  %315  %293 %296
+%325 = OpImageSampleExplicitLod  %24  %324 %299 Lod %304
+%326 = OpLoad  %24  %306
+%327 = OpFAdd  %24  %326 %325
 OpLine %3 124 5
-OpStore %308 %329
+OpStore %306 %327
 OpLine %3 125 5
-%330 = OpSampledImage  %317  %295 %298
-%331 = OpImageSampleExplicitLod  %24  %330 %301 Lod|ConstOffset %306 %305
-%332 = OpLoad  %24  %308
-%333 = OpFAdd  %24  %332 %331
+%328 = OpSampledImage  %315  %293 %296
+%329 = OpImageSampleExplicitLod  %24  %328 %299 Lod|ConstOffset %304 %303
+%330 = OpLoad  %24  %306
+%331 = OpFAdd  %24  %330 %329
 OpLine %3 125 5
-OpStore %308 %333
+OpStore %306 %331
 OpLine %3 126 5
-%334 = OpSampledImage  %317  %295 %298
-%335 = OpImageSampleImplicitLod  %24  %334 %301 Bias|ConstOffset %307 %305
-%336 = OpLoad  %24  %308
-%337 = OpFAdd  %24  %336 %335
+%332 = OpSampledImage  %315  %293 %296
+%333 = OpImageSampleImplicitLod  %24  %332 %299 Bias|ConstOffset %305 %303
+%334 = OpLoad  %24  %306
+%335 = OpFAdd  %24  %334 %333
 OpLine %3 126 5
-OpStore %308 %337
+OpStore %306 %335
 OpLine %3 127 5
-%339 = OpConvertUToF  %8  %219
-%340 = OpCompositeConstruct  %302  %301 %339
-%341 = OpSampledImage  %338  %296 %298
-%342 = OpImageSampleImplicitLod  %24  %341 %340
-%343 = OpLoad  %24  %308
-%344 = OpFAdd  %24  %343 %342
+%337 = OpConvertUToF  %8  %217
+%338 = OpCompositeConstruct  %300  %299 %337
+%339 = OpSampledImage  %336  %294 %296
+%340 = OpImageSampleImplicitLod  %24  %339 %338
+%341 = OpLoad  %24  %306
+%342 = OpFAdd  %24  %341 %340
 OpLine %3 127 5
-OpStore %308 %344
+OpStore %306 %342
 OpLine %3 128 5
-%345 = OpConvertUToF  %8  %219
-%346 = OpCompositeConstruct  %302  %301 %345
-%347 = OpSampledImage  %338  %296 %298
-%348 = OpImageSampleImplicitLod  %24  %347 %346 ConstOffset %305
-%349 = OpLoad  %24  %308
-%350 = OpFAdd  %24  %349 %348
+%343 = OpConvertUToF  %8  %217
+%344 = OpCompositeConstruct  %300  %299 %343
+%345 = OpSampledImage  %336  %294 %296
+%346 = OpImageSampleImplicitLod  %24  %345 %344 ConstOffset %303
+%347 = OpLoad  %24  %306
+%348 = OpFAdd  %24  %347 %346
 OpLine %3 128 5
-OpStore %308 %350
+OpStore %306 %348
 OpLine %3 129 5
-%351 = OpConvertUToF  %8  %219
-%352 = OpCompositeConstruct  %302  %301 %351
-%353 = OpSampledImage  %338  %296 %298
-%354 = OpImageSampleExplicitLod  %24  %353 %352 Lod %306
-%355 = OpLoad  %24  %308
-%356 = OpFAdd  %24  %355 %354
+%349 = OpConvertUToF  %8  %217
+%350 = OpCompositeConstruct  %300  %299 %349
+%351 = OpSampledImage  %336  %294 %296
+%352 = OpImageSampleExplicitLod  %24  %351 %350 Lod %304
+%353 = OpLoad  %24  %306
+%354 = OpFAdd  %24  %353 %352
 OpLine %3 129 5
-OpStore %308 %356
+OpStore %306 %354
 OpLine %3 130 5
-%357 = OpConvertUToF  %8  %219
-%358 = OpCompositeConstruct  %302  %301 %357
-%359 = OpSampledImage  %338  %296 %298
-%360 = OpImageSampleExplicitLod  %24  %359 %358 Lod|ConstOffset %306 %305
-%361 = OpLoad  %24  %308
-%362 = OpFAdd  %24  %361 %360
+%355 = OpConvertUToF  %8  %217
+%356 = OpCompositeConstruct  %300  %299 %355
+%357 = OpSampledImage  %336  %294 %296
+%358 = OpImageSampleExplicitLod  %24  %357 %356 Lod|ConstOffset %304 %303
+%359 = OpLoad  %24  %306
+%360 = OpFAdd  %24  %359 %358
 OpLine %3 130 5
-OpStore %308 %362
+OpStore %306 %360
 OpLine %3 131 5
-%363 = OpConvertUToF  %8  %219
-%364 = OpCompositeConstruct  %302  %301 %363
-%365 = OpSampledImage  %338  %296 %298
-%366 = OpImageSampleImplicitLod  %24  %365 %364 Bias|ConstOffset %307 %305
-%367 = OpLoad  %24  %308
-%368 = OpFAdd  %24  %367 %366
+%361 = OpConvertUToF  %8  %217
+%362 = OpCompositeConstruct  %300  %299 %361
+%363 = OpSampledImage  %336  %294 %296
+%364 = OpImageSampleImplicitLod  %24  %363 %362 Bias|ConstOffset %305 %303
+%365 = OpLoad  %24  %306
+%366 = OpFAdd  %24  %365 %364
 OpLine %3 131 5
-OpStore %308 %368
+OpStore %306 %366
 OpLine %3 132 5
-%369 = OpConvertSToF  %8  %77
-%370 = OpCompositeConstruct  %302  %301 %369
-%371 = OpSampledImage  %338  %296 %298
-%372 = OpImageSampleImplicitLod  %24  %371 %370
-%373 = OpLoad  %24  %308
-%374 = OpFAdd  %24  %373 %372
+%367 = OpConvertSToF  %8  %77
+%368 = OpCompositeConstruct  %300  %299 %367
+%369 = OpSampledImage  %336  %294 %296
+%370 = OpImageSampleImplicitLod  %24  %369 %368
+%371 = OpLoad  %24  %306
+%372 = OpFAdd  %24  %371 %370
 OpLine %3 132 5
-OpStore %308 %374
+OpStore %306 %372
 OpLine %3 133 5
-%375 = OpConvertSToF  %8  %77
-%376 = OpCompositeConstruct  %302  %301 %375
-%377 = OpSampledImage  %338  %296 %298
-%378 = OpImageSampleImplicitLod  %24  %377 %376 ConstOffset %305
-%379 = OpLoad  %24  %308
-%380 = OpFAdd  %24  %379 %378
+%373 = OpConvertSToF  %8  %77
+%374 = OpCompositeConstruct  %300  %299 %373
+%375 = OpSampledImage  %336  %294 %296
+%376 = OpImageSampleImplicitLod  %24  %375 %374 ConstOffset %303
+%377 = OpLoad  %24  %306
+%378 = OpFAdd  %24  %377 %376
 OpLine %3 133 5
-OpStore %308 %380
+OpStore %306 %378
 OpLine %3 134 5
-%381 = OpConvertSToF  %8  %77
-%382 = OpCompositeConstruct  %302  %301 %381
-%383 = OpSampledImage  %338  %296 %298
-%384 = OpImageSampleExplicitLod  %24  %383 %382 Lod %306
-%385 = OpLoad  %24  %308
-%386 = OpFAdd  %24  %385 %384
+%379 = OpConvertSToF  %8  %77
+%380 = OpCompositeConstruct  %300  %299 %379
+%381 = OpSampledImage  %336  %294 %296
+%382 = OpImageSampleExplicitLod  %24  %381 %380 Lod %304
+%383 = OpLoad  %24  %306
+%384 = OpFAdd  %24  %383 %382
 OpLine %3 134 5
-OpStore %308 %386
+OpStore %306 %384
 OpLine %3 135 5
-%387 = OpConvertSToF  %8  %77
-%388 = OpCompositeConstruct  %302  %301 %387
-%389 = OpSampledImage  %338  %296 %298
-%390 = OpImageSampleExplicitLod  %24  %389 %388 Lod|ConstOffset %306 %305
-%391 = OpLoad  %24  %308
-%392 = OpFAdd  %24  %391 %390
+%385 = OpConvertSToF  %8  %77
+%386 = OpCompositeConstruct  %300  %299 %385
+%387 = OpSampledImage  %336  %294 %296
+%388 = OpImageSampleExplicitLod  %24  %387 %386 Lod|ConstOffset %304 %303
+%389 = OpLoad  %24  %306
+%390 = OpFAdd  %24  %389 %388
 OpLine %3 135 5
-OpStore %308 %392
+OpStore %306 %390
 OpLine %3 136 5
-%393 = OpConvertSToF  %8  %77
-%394 = OpCompositeConstruct  %302  %301 %393
-%395 = OpSampledImage  %338  %296 %298
-%396 = OpImageSampleImplicitLod  %24  %395 %394 Bias|ConstOffset %307 %305
-%397 = OpLoad  %24  %308
-%398 = OpFAdd  %24  %397 %396
+%391 = OpConvertSToF  %8  %77
+%392 = OpCompositeConstruct  %300  %299 %391
+%393 = OpSampledImage  %336  %294 %296
+%394 = OpImageSampleImplicitLod  %24  %393 %392 Bias|ConstOffset %305 %303
+%395 = OpLoad  %24  %306
+%396 = OpFAdd  %24  %395 %394
 OpLine %3 136 5
-OpStore %308 %398
+OpStore %306 %396
 OpLine %3 137 5
-%400 = OpConvertUToF  %8  %219
-%401 = OpCompositeConstruct  %24  %303 %400
-%402 = OpSampledImage  %399  %297 %298
-%403 = OpImageSampleImplicitLod  %24  %402 %401
-%404 = OpLoad  %24  %308
-%405 = OpFAdd  %24  %404 %403
+%398 = OpConvertUToF  %8  %217
+%399 = OpCompositeConstruct  %24  %301 %398
+%400 = OpSampledImage  %397  %295 %296
+%401 = OpImageSampleImplicitLod  %24  %400 %399
+%402 = OpLoad  %24  %306
+%403 = OpFAdd  %24  %402 %401
 OpLine %3 137 5
-OpStore %308 %405
+OpStore %306 %403
 OpLine %3 138 5
-%406 = OpConvertUToF  %8  %219
-%407 = OpCompositeConstruct  %24  %303 %406
-%408 = OpSampledImage  %399  %297 %298
-%409 = OpImageSampleExplicitLod  %24  %408 %407 Lod %306
-%410 = OpLoad  %24  %308
-%411 = OpFAdd  %24  %410 %409
+%404 = OpConvertUToF  %8  %217
+%405 = OpCompositeConstruct  %24  %301 %404
+%406 = OpSampledImage  %397  %295 %296
+%407 = OpImageSampleExplicitLod  %24  %406 %405 Lod %304
+%408 = OpLoad  %24  %306
+%409 = OpFAdd  %24  %408 %407
 OpLine %3 138 5
-OpStore %308 %411
+OpStore %306 %409
 OpLine %3 139 5
-%412 = OpConvertUToF  %8  %219
-%413 = OpCompositeConstruct  %24  %303 %412
-%414 = OpSampledImage  %399  %297 %298
-%415 = OpImageSampleImplicitLod  %24  %414 %413 Bias %307
-%416 = OpLoad  %24  %308
-%417 = OpFAdd  %24  %416 %415
+%410 = OpConvertUToF  %8  %217
+%411 = OpCompositeConstruct  %24  %301 %410
+%412 = OpSampledImage  %397  %295 %296
+%413 = OpImageSampleImplicitLod  %24  %412 %411 Bias %305
+%414 = OpLoad  %24  %306
+%415 = OpFAdd  %24  %414 %413
 OpLine %3 139 5
-OpStore %308 %417
+OpStore %306 %415
 OpLine %3 140 5
-%418 = OpConvertSToF  %8  %77
-%419 = OpCompositeConstruct  %24  %303 %418
-%420 = OpSampledImage  %399  %297 %298
-%421 = OpImageSampleImplicitLod  %24  %420 %419
-%422 = OpLoad  %24  %308
-%423 = OpFAdd  %24  %422 %421
+%416 = OpConvertSToF  %8  %77
+%417 = OpCompositeConstruct  %24  %301 %416
+%418 = OpSampledImage  %397  %295 %296
+%419 = OpImageSampleImplicitLod  %24  %418 %417
+%420 = OpLoad  %24  %306
+%421 = OpFAdd  %24  %420 %419
 OpLine %3 140 5
-OpStore %308 %423
+OpStore %306 %421
 OpLine %3 141 5
-%424 = OpConvertSToF  %8  %77
-%425 = OpCompositeConstruct  %24  %303 %424
-%426 = OpSampledImage  %399  %297 %298
-%427 = OpImageSampleExplicitLod  %24  %426 %425 Lod %306
-%428 = OpLoad  %24  %308
-%429 = OpFAdd  %24  %428 %427
+%422 = OpConvertSToF  %8  %77
+%423 = OpCompositeConstruct  %24  %301 %422
+%424 = OpSampledImage  %397  %295 %296
+%425 = OpImageSampleExplicitLod  %24  %424 %423 Lod %304
+%426 = OpLoad  %24  %306
+%427 = OpFAdd  %24  %426 %425
 OpLine %3 141 5
-OpStore %308 %429
+OpStore %306 %427
 OpLine %3 142 5
-%430 = OpConvertSToF  %8  %77
-%431 = OpCompositeConstruct  %24  %303 %430
-%432 = OpSampledImage  %399  %297 %298
-%433 = OpImageSampleImplicitLod  %24  %432 %431 Bias %307
-%434 = OpLoad  %24  %308
-%435 = OpFAdd  %24  %434 %433
+%428 = OpConvertSToF  %8  %77
+%429 = OpCompositeConstruct  %24  %301 %428
+%430 = OpSampledImage  %397  %295 %296
+%431 = OpImageSampleImplicitLod  %24  %430 %429 Bias %305
+%432 = OpLoad  %24  %306
+%433 = OpFAdd  %24  %432 %431
 OpLine %3 142 5
-OpStore %308 %435
+OpStore %306 %433
 OpLine %3 1 1
-%436 = OpLoad  %24  %308
-OpStore %292 %436
+%434 = OpLoad  %24  %306
+OpStore %290 %434
 OpReturn
 OpFunctionEnd
-%440 = OpFunction  %2  None %97
-%437 = OpLabel
-%445 = OpVariable  %446  Function %447
-%441 = OpLoad  %25  %63
-%442 = OpLoad  %26  %64
-%443 = OpLoad  %27  %66
-%444 = OpLoad  %28  %68
-OpBranch %448
-%448 = OpLabel
+%438 = OpFunction  %2  None %97
+%435 = OpLabel
+%443 = OpVariable  %444  Function %445
+%439 = OpLoad  %25  %63
+%440 = OpLoad  %26  %64
+%441 = OpLoad  %27  %66
+%442 = OpLoad  %28  %68
+OpBranch %446
+%446 = OpLabel
 OpLine %3 157 14
 OpLine %3 158 15
 OpLine %3 161 5
-%450 = OpSampledImage  %449  %442 %441
-%451 = OpImageSampleDrefImplicitLod  %8  %450 %301 %299
-%452 = OpLoad  %8  %445
-%453 = OpFAdd  %8  %452 %451
+%448 = OpSampledImage  %447  %440 %439
+%449 = OpImageSampleDrefImplicitLod  %8  %448 %299 %297
+%450 = OpLoad  %8  %443
+%451 = OpFAdd  %8  %450 %449
 OpLine %3 161 5
-OpStore %445 %453
+OpStore %443 %451
 OpLine %3 162 5
-%455 = OpConvertUToF  %8  %219
-%456 = OpCompositeConstruct  %302  %301 %455
-%457 = OpSampledImage  %454  %443 %441
-%458 = OpImageSampleDrefImplicitLod  %8  %457 %456 %299
-%459 = OpLoad  %8  %445
-%460 = OpFAdd  %8  %459 %458
+%453 = OpConvertUToF  %8  %217
+%454 = OpCompositeConstruct  %300  %299 %453
+%455 = OpSampledImage  %452  %441 %439
+%456 = OpImageSampleDrefImplicitLod  %8  %455 %454 %297
+%457 = OpLoad  %8  %443
+%458 = OpFAdd  %8  %457 %456
 OpLine %3 162 5
-OpStore %445 %460
+OpStore %443 %458
 OpLine %3 163 5
-%461 = OpConvertSToF  %8  %77
-%462 = OpCompositeConstruct  %302  %301 %461
-%463 = OpSampledImage  %454  %443 %441
-%464 = OpImageSampleDrefImplicitLod  %8  %463 %462 %299
-%465 = OpLoad  %8  %445
-%466 = OpFAdd  %8  %465 %464
+%459 = OpConvertSToF  %8  %77
+%460 = OpCompositeConstruct  %300  %299 %459
+%461 = OpSampledImage  %452  %441 %439
+%462 = OpImageSampleDrefImplicitLod  %8  %461 %460 %297
+%463 = OpLoad  %8  %443
+%464 = OpFAdd  %8  %463 %462
 OpLine %3 163 5
-OpStore %445 %466
+OpStore %443 %464
 OpLine %3 164 5
-%468 = OpSampledImage  %467  %444 %441
-%469 = OpImageSampleDrefImplicitLod  %8  %468 %303 %299
-%470 = OpLoad  %8  %445
-%471 = OpFAdd  %8  %470 %469
+%466 = OpSampledImage  %465  %442 %439
+%467 = OpImageSampleDrefImplicitLod  %8  %466 %301 %297
+%468 = OpLoad  %8  %443
+%469 = OpFAdd  %8  %468 %467
 OpLine %3 164 5
-OpStore %445 %471
+OpStore %443 %469
 OpLine %3 165 5
-%472 = OpSampledImage  %449  %442 %441
-%473 = OpImageSampleDrefExplicitLod  %8  %472 %301 %299 Lod %474
-%475 = OpLoad  %8  %445
-%476 = OpFAdd  %8  %475 %473
+%470 = OpSampledImage  %447  %440 %439
+%471 = OpImageSampleDrefExplicitLod  %8  %470 %299 %297 Lod %472
+%473 = OpLoad  %8  %443
+%474 = OpFAdd  %8  %473 %471
 OpLine %3 165 5
-OpStore %445 %476
+OpStore %443 %474
 OpLine %3 166 5
-%477 = OpConvertUToF  %8  %219
-%478 = OpCompositeConstruct  %302  %301 %477
-%479 = OpSampledImage  %454  %443 %441
-%480 = OpImageSampleDrefExplicitLod  %8  %479 %478 %299 Lod %474
-%481 = OpLoad  %8  %445
-%482 = OpFAdd  %8  %481 %480
+%475 = OpConvertUToF  %8  %217
+%476 = OpCompositeConstruct  %300  %299 %475
+%477 = OpSampledImage  %452  %441 %439
+%478 = OpImageSampleDrefExplicitLod  %8  %477 %476 %297 Lod %472
+%479 = OpLoad  %8  %443
+%480 = OpFAdd  %8  %479 %478
 OpLine %3 166 5
-OpStore %445 %482
+OpStore %443 %480
 OpLine %3 167 5
-%483 = OpConvertSToF  %8  %77
-%484 = OpCompositeConstruct  %302  %301 %483
-%485 = OpSampledImage  %454  %443 %441
-%486 = OpImageSampleDrefExplicitLod  %8  %485 %484 %299 Lod %474
-%487 = OpLoad  %8  %445
-%488 = OpFAdd  %8  %487 %486
+%481 = OpConvertSToF  %8  %77
+%482 = OpCompositeConstruct  %300  %299 %481
+%483 = OpSampledImage  %452  %441 %439
+%484 = OpImageSampleDrefExplicitLod  %8  %483 %482 %297 Lod %472
+%485 = OpLoad  %8  %443
+%486 = OpFAdd  %8  %485 %484
 OpLine %3 167 5
-OpStore %445 %488
+OpStore %443 %486
 OpLine %3 168 5
-%489 = OpSampledImage  %467  %444 %441
-%490 = OpImageSampleDrefExplicitLod  %8  %489 %303 %299 Lod %474
-%491 = OpLoad  %8  %445
-%492 = OpFAdd  %8  %491 %490
+%487 = OpSampledImage  %465  %442 %439
+%488 = OpImageSampleDrefExplicitLod  %8  %487 %301 %297 Lod %472
+%489 = OpLoad  %8  %443
+%490 = OpFAdd  %8  %489 %488
 OpLine %3 168 5
-OpStore %445 %492
+OpStore %443 %490
 OpLine %3 1 1
-%493 = OpLoad  %8  %445
-OpStore %438 %493
+%491 = OpLoad  %8  %443
+OpStore %436 %491
 OpReturn
 OpFunctionEnd
-%496 = OpFunction  %2  None %97
-%494 = OpLabel
-%497 = OpLoad  %17  %46
-%498 = OpLoad  %4  %48
-%499 = OpLoad  %18  %49
-%500 = OpLoad  %25  %61
-%501 = OpLoad  %25  %63
-%502 = OpLoad  %26  %64
-OpBranch %503
-%503 = OpLabel
+%494 = OpFunction  %2  None %97
+%492 = OpLabel
+%495 = OpLoad  %17  %46
+%496 = OpLoad  %4  %48
+%497 = OpLoad  %18  %49
+%498 = OpLoad  %25  %61
+%499 = OpLoad  %25  %63
+%500 = OpLoad  %26  %64
+OpBranch %501
+%501 = OpLabel
 OpLine %3 174 14
 OpLine %3 176 15
-%504 = OpSampledImage  %317  %497 %500
-%505 = OpImageGather  %24  %504 %301 %506
+%502 = OpSampledImage  %315  %495 %498
+%503 = OpImageGather  %24  %502 %299 %504
 OpLine %3 177 22
-%507 = OpSampledImage  %317  %497 %500
-%508 = OpImageGather  %24  %507 %301 %509 ConstOffset %305
+%505 = OpSampledImage  %315  %495 %498
+%506 = OpImageGather  %24  %505 %299 %507 ConstOffset %303
 OpLine %3 178 21
-%510 = OpSampledImage  %449  %502 %501
-%511 = OpImageDrefGather  %24  %510 %301 %299
+%508 = OpSampledImage  %447  %500 %499
+%509 = OpImageDrefGather  %24  %508 %299 %297
 OpLine %3 179 28
-%512 = OpSampledImage  %449  %502 %501
-%513 = OpImageDrefGather  %24  %512 %301 %299 ConstOffset %305
+%510 = OpSampledImage  %447  %500 %499
+%511 = OpImageDrefGather  %24  %510 %299 %297 ConstOffset %303
 OpLine %3 181 13
-%515 = OpSampledImage  %514  %498 %500
-%516 = OpImageGather  %116  %515 %301 %219
+%513 = OpSampledImage  %512  %496 %498
+%514 = OpImageGather  %116  %513 %299 %217
 OpLine %3 182 13
-%519 = OpSampledImage  %518  %499 %500
-%520 = OpImageGather  %517  %519 %301 %219
+%517 = OpSampledImage  %516  %497 %498
+%518 = OpImageGather  %515  %517 %299 %217
 OpLine %3 183 13
-%521 = OpConvertUToF  %24  %516
-%522 = OpConvertSToF  %24  %520
-%523 = OpFAdd  %24  %521 %522
+%519 = OpConvertUToF  %24  %514
+%520 = OpConvertSToF  %24  %518
+%521 = OpFAdd  %24  %519 %520
 OpLine %3 185 12
-%524 = OpFAdd  %24  %505 %508
-%525 = OpFAdd  %24  %524 %511
-%526 = OpFAdd  %24  %525 %513
-%527 = OpFAdd  %24  %526 %523
-OpStore %495 %527
+%522 = OpFAdd  %24  %503 %506
+%523 = OpFAdd  %24  %522 %509
+%524 = OpFAdd  %24  %523 %511
+%525 = OpFAdd  %24  %524 %521
+OpStore %493 %525
 OpReturn
 OpFunctionEnd
-%530 = OpFunction  %2  None %97
-%528 = OpLabel
-%531 = OpLoad  %25  %61
-%532 = OpLoad  %26  %64
-OpBranch %533
-%533 = OpLabel
+%528 = OpFunction  %2  None %97
+%526 = OpLabel
+%529 = OpLoad  %25  %61
+%530 = OpLoad  %26  %64
+OpBranch %531
+%531 = OpLabel
 OpLine %3 190 14
 OpLine %3 192 15
-%534 = OpSampledImage  %449  %532 %531
-%535 = OpImageSampleImplicitLod  %24  %534 %301
-%536 = OpCompositeExtract  %8  %535 0
+%532 = OpSampledImage  %447  %530 %529
+%533 = OpImageSampleImplicitLod  %24  %532 %299
+%534 = OpCompositeExtract  %8  %533 0
 OpLine %3 193 22
-%537 = OpSampledImage  %449  %532 %531
-%538 = OpImageGather  %24  %537 %301 %219
+%535 = OpSampledImage  %447  %530 %529
+%536 = OpImageGather  %24  %535 %299 %217
 OpLine %3 194 21
-%539 = OpSampledImage  %449  %532 %531
-%541 = OpConvertSToF  %8  %88
-%540 = OpImageSampleExplicitLod  %24  %539 %301 Lod %541
-%542 = OpCompositeExtract  %8  %540 0
+%537 = OpSampledImage  %447  %530 %529
+%539 = OpConvertSToF  %8  %88
+%538 = OpImageSampleExplicitLod  %24  %537 %299 Lod %539
+%540 = OpCompositeExtract  %8  %538 0
 OpLine %3 192 15
-%543 = OpCompositeConstruct  %24  %536 %536 %536 %536
-%544 = OpFAdd  %24  %543 %538
-%545 = OpCompositeConstruct  %24  %542 %542 %542 %542
-%546 = OpFAdd  %24  %544 %545
-OpStore %529 %546
+%541 = OpCompositeConstruct  %24  %534 %534 %534 %534
+%542 = OpFAdd  %24  %541 %536
+%543 = OpCompositeConstruct  %24  %540 %540 %540 %540
+%544 = OpFAdd  %24  %542 %543
+OpStore %527 %544
 OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/wgsl/extra.wgsl b/naga/tests/out/wgsl/extra.wgsl
index 25f894e7f5..574c35ab0b 100644
--- a/naga/tests/out/wgsl/extra.wgsl
+++ b/naga/tests/out/wgsl/extra.wgsl
@@ -1,6 +1,6 @@
 struct PushConstants {
     index: u32,
-    double: vec2<f64>,
+    double: vec2<f32>,
 }
 
 struct FragmentIn {
diff --git a/naga/tests/out/wgsl/f16-glsl.comp.wgsl b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
new file mode 100644
index 0000000000..e12dcb1408
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-glsl.comp.wgsl
@@ -0,0 +1,47 @@
+enable f16;
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+@group(0) @binding(0) 
+var<uniform> global: A;
+@group(0) @binding(1) 
+var<storage, read_write> global_1: B;
+
+fn main_1() {
+    let _e16 = global.a_1_;
+    global_1.b_1_ = _e16;
+    let _e17 = global.a_vec2_;
+    global_1.b_vec2_ = _e17;
+    let _e18 = global.a_vec3_;
+    global_1.b_vec3_ = _e18;
+    let _e19 = global.a_vec4_;
+    global_1.b_vec4_ = _e19;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+    return;
+}
diff --git a/naga/tests/out/wgsl/f16-spv.wgsl b/naga/tests/out/wgsl/f16-spv.wgsl
new file mode 100644
index 0000000000..4c29ace8c3
--- /dev/null
+++ b/naga/tests/out/wgsl/f16-spv.wgsl
@@ -0,0 +1,46 @@
+enable f16;
+
+struct B {
+    b_1_: f16,
+    b_vec2_: vec2<f16>,
+    b_vec3_: vec3<f16>,
+    b_vec4_: vec4<f16>,
+    b_mat2_: mat2x2<f16>,
+    b_mat2x3_: mat2x3<f16>,
+    b_mat2x4_: mat2x4<f16>,
+    b_mat3x2_: mat3x2<f16>,
+    b_mat3_: mat3x3<f16>,
+    b_mat3x4_: mat3x4<f16>,
+    b_mat4x2_: mat4x2<f16>,
+    b_mat4x3_: mat4x3<f16>,
+    b_mat4_: mat4x4<f16>,
+}
+
+struct A {
+    a_1_: f16,
+    a_vec2_: vec2<f16>,
+    a_vec3_: vec3<f16>,
+    a_vec4_: vec4<f16>,
+}
+
+@group(0) @binding(1) 
+var<storage, read_write> unnamed: B;
+@group(0) @binding(0) 
+var<uniform> unnamed_1: A;
+
+fn main_1() {
+    let _e3 = unnamed_1.a_1_;
+    unnamed.b_1_ = _e3;
+    let _e6 = unnamed_1.a_vec2_;
+    unnamed.b_vec2_ = _e6;
+    let _e9 = unnamed_1.a_vec3_;
+    unnamed.b_vec3_ = _e9;
+    let _e12 = unnamed_1.a_vec4_;
+    unnamed.b_vec4_ = _e12;
+    return;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    main_1();
+}
diff --git a/naga/tests/out/wgsl/f16.wgsl b/naga/tests/out/wgsl/f16.wgsl
new file mode 100644
index 0000000000..a45edce3b5
--- /dev/null
+++ b/naga/tests/out/wgsl/f16.wgsl
@@ -0,0 +1,167 @@
+enable f16;
+
+struct UniformCompatible {
+    val_u32_: u32,
+    val_i32_: i32,
+    val_f32_: f32,
+    val_f16_: f16,
+    val_f16_2_: vec2<f16>,
+    val_f16_3_: vec3<f16>,
+    val_f16_4_: vec4<f16>,
+    final_value: f16,
+    val_mat2x2_: mat2x2<f16>,
+    val_mat2x3_: mat2x3<f16>,
+    val_mat2x4_: mat2x4<f16>,
+    val_mat3x2_: mat3x2<f16>,
+    val_mat3x3_: mat3x3<f16>,
+    val_mat3x4_: mat3x4<f16>,
+    val_mat4x2_: mat4x2<f16>,
+    val_mat4x3_: mat4x3<f16>,
+    val_mat4x4_: mat4x4<f16>,
+}
+
+struct StorageCompatible {
+    val_f16_array_2_: array<f16, 2>,
+}
+
+struct LayoutTest {
+    scalar1_: f16,
+    scalar2_: f16,
+    v3_: vec3<f16>,
+    tuck_in: f16,
+    scalar4_: f16,
+    larger: u32,
+}
+
+const constant_variable: f16 = 15.203125h;
+
+var<private> private_variable: f16 = 1h;
+@group(0) @binding(0) 
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1) 
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2) 
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3) 
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4) 
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn f16_function(x: f16) -> f16 {
+    var val: f16 = 15.203125h;
+
+    let _e4 = val;
+    val = (_e4 + -33344h);
+    let _e6 = val;
+    let _e9 = val;
+    val = (_e9 + (_e6 + 5h));
+    let _e13 = input_uniform.val_f32_;
+    let _e14 = val;
+    let _e18 = val;
+    val = (_e18 + f16((_e13 + f32(_e14))));
+    let _e22 = input_uniform.val_f16_;
+    let _e25 = val;
+    val = (_e25 + vec3(_e22).z);
+    let _e31 = input_uniform.val_f16_;
+    let _e34 = input_storage.val_f16_;
+    output.val_f16_ = (_e31 + _e34);
+    let _e40 = input_uniform.val_f16_2_;
+    let _e43 = input_storage.val_f16_2_;
+    output.val_f16_2_ = (_e40 + _e43);
+    let _e49 = input_uniform.val_f16_3_;
+    let _e52 = input_storage.val_f16_3_;
+    output.val_f16_3_ = (_e49 + _e52);
+    let _e58 = input_uniform.val_f16_4_;
+    let _e61 = input_storage.val_f16_4_;
+    output.val_f16_4_ = (_e58 + _e61);
+    let _e67 = input_uniform.val_mat2x2_;
+    let _e70 = input_storage.val_mat2x2_;
+    output.val_mat2x2_ = (_e67 + _e70);
+    let _e76 = input_uniform.val_mat2x3_;
+    let _e79 = input_storage.val_mat2x3_;
+    output.val_mat2x3_ = (_e76 + _e79);
+    let _e85 = input_uniform.val_mat2x4_;
+    let _e88 = input_storage.val_mat2x4_;
+    output.val_mat2x4_ = (_e85 + _e88);
+    let _e94 = input_uniform.val_mat3x2_;
+    let _e97 = input_storage.val_mat3x2_;
+    output.val_mat3x2_ = (_e94 + _e97);
+    let _e103 = input_uniform.val_mat3x3_;
+    let _e106 = input_storage.val_mat3x3_;
+    output.val_mat3x3_ = (_e103 + _e106);
+    let _e112 = input_uniform.val_mat3x4_;
+    let _e115 = input_storage.val_mat3x4_;
+    output.val_mat3x4_ = (_e112 + _e115);
+    let _e121 = input_uniform.val_mat4x2_;
+    let _e124 = input_storage.val_mat4x2_;
+    output.val_mat4x2_ = (_e121 + _e124);
+    let _e130 = input_uniform.val_mat4x3_;
+    let _e133 = input_storage.val_mat4x3_;
+    output.val_mat4x3_ = (_e130 + _e133);
+    let _e139 = input_uniform.val_mat4x4_;
+    let _e142 = input_storage.val_mat4x4_;
+    output.val_mat4x4_ = (_e139 + _e142);
+    let _e148 = input_arrays.val_f16_array_2_;
+    output_arrays.val_f16_array_2_ = _e148;
+    let _e149 = val;
+    let _e151 = val;
+    val = (_e151 + abs(_e149));
+    let _e153 = val;
+    let _e154 = val;
+    let _e155 = val;
+    let _e157 = val;
+    val = (_e157 + clamp(_e153, _e154, _e155));
+    let _e159 = val;
+    let _e161 = val;
+    let _e164 = val;
+    val = (_e164 + dot(vec2(_e159), vec2(_e161)));
+    let _e166 = val;
+    let _e167 = val;
+    let _e169 = val;
+    val = (_e169 + max(_e166, _e167));
+    let _e171 = val;
+    let _e172 = val;
+    let _e174 = val;
+    val = (_e174 + min(_e171, _e172));
+    let _e176 = val;
+    let _e178 = val;
+    val = (_e178 + sign(_e176));
+    let _e181 = val;
+    val = (_e181 + 1h);
+    let _e185 = input_uniform.val_f16_2_;
+    let float_vec2_ = vec2<f32>(_e185);
+    output.val_f16_2_ = vec2<f16>(float_vec2_);
+    let _e192 = input_uniform.val_f16_3_;
+    let float_vec3_ = vec3<f32>(_e192);
+    output.val_f16_3_ = vec3<f16>(float_vec3_);
+    let _e199 = input_uniform.val_f16_4_;
+    let float_vec4_ = vec4<f32>(_e199);
+    output.val_f16_4_ = vec4<f16>(float_vec4_);
+    let _e208 = input_uniform.val_mat2x2_;
+    output.val_mat2x2_ = mat2x2<f16>(mat2x2<f32>(_e208));
+    let _e215 = input_uniform.val_mat2x3_;
+    output.val_mat2x3_ = mat2x3<f16>(mat2x3<f32>(_e215));
+    let _e222 = input_uniform.val_mat2x4_;
+    output.val_mat2x4_ = mat2x4<f16>(mat2x4<f32>(_e222));
+    let _e229 = input_uniform.val_mat3x2_;
+    output.val_mat3x2_ = mat3x2<f16>(mat3x2<f32>(_e229));
+    let _e236 = input_uniform.val_mat3x3_;
+    output.val_mat3x3_ = mat3x3<f16>(mat3x3<f32>(_e236));
+    let _e243 = input_uniform.val_mat3x4_;
+    output.val_mat3x4_ = mat3x4<f16>(mat3x4<f32>(_e243));
+    let _e250 = input_uniform.val_mat4x2_;
+    output.val_mat4x2_ = mat4x2<f16>(mat4x2<f32>(_e250));
+    let _e257 = input_uniform.val_mat4x3_;
+    output.val_mat4x3_ = mat4x3<f16>(mat4x3<f32>(_e257));
+    let _e264 = input_uniform.val_mat4x4_;
+    output.val_mat4x4_ = mat4x4<f16>(mat4x4<f32>(_e264));
+    let _e267 = val;
+    return _e267;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    let _e3 = f16_function(2h);
+    output.final_value = _e3;
+    return;
+}
diff --git a/naga/tests/spirv_capabilities.rs b/naga/tests/spirv_capabilities.rs
index f221c7896e..2d46e37f72 100644
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@@ -208,3 +208,8 @@ fn int64() {
         "#,
     );
 }
+
+#[test]
+fn float16() {
+    require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
+}
diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs
index 2447208b2a..e1d0a42234 100644
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@@ -8,7 +8,8 @@ use naga::valid::Capabilities;
 #[track_caller]
 fn check(input: &str, snapshot: &str) {
     let output = naga::front::wgsl::parse_str(input)
-        .expect_err("expected parser error")
+        .map(|_| panic!("expected parser error, but parsing succeeded!"))
+        .unwrap_err()
         .emit_to_string(input);
     if output != snapshot {
         for diff in diff::lines(snapshot, &output) {
@@ -22,6 +23,19 @@ fn check(input: &str, snapshot: &str) {
     }
 }
 
+#[track_caller]
+fn check_success(input: &str) {
+    match naga::front::wgsl::parse_str(input) {
+        Ok(_) => {}
+        Err(err) => {
+            panic!(
+                "expected success, but parsing failed with:\n{}",
+                err.emit_to_string(input)
+            );
+        }
+    }
+}
+
 #[test]
 fn very_negative_integers() {
     // wgpu#4492
@@ -827,6 +841,50 @@ fn matrix_constructor_inferred() {
     );
 }
 
+#[test]
+fn float16_requires_enable() {
+    check(
+        r#"
+            const a: f16 = 1.0;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:22
+  │
+2 │             const a: f16 = 1.0;
+  │                      ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+
+    check(
+        r#"
+            const a = 1.0h;
+        "#,
+        r#"error: the `f16` language extension is not enabled
+  ┌─ wgsl:2:23
+  │
+2 │             const a = 1.0h;
+  │                       ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+
+"#,
+    );
+}
+
+#[test]
+fn multiple_enables_valid() {
+    check_success(
+        r#"
+            enable f16;
+            enable f16;
+            const a: f16 = 1.0h;
+        "#,
+    );
+}
+
 /// Check the result of validating a WGSL program against a pattern.
 ///
 /// Unless you are generating code programmatically, the
@@ -932,6 +990,53 @@ fn int64_capability() {
     }
 }
 
+#[test]
+fn float16_capability() {
+    check_validation! {
+        "enable f16; var input: f16;",
+        "enable f16; var input: vec2<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "FLOAT16",..}),
+            ..
+        })
+    }
+}
+
+#[test]
+fn float16_in_push_constant() {
+    check_validation! {
+        "enable f16; var<push_constant> input: f16;",
+        "enable f16; var<push_constant> input: vec2<f16>;",
+        "enable f16; var<push_constant> input: mat4x4<f16>;",
+        "enable f16; struct S { a: f16 }; var<push_constant> input: S;",
+        "enable f16; struct S1 { a: f16 }; struct S2 { a : S1 } var<push_constant> input: S2;":
+        Err(naga::valid::ValidationError::GlobalVariable {
+            source: naga::valid::GlobalVariableError::InvalidPushConstantType(
+                naga::valid::PushConstantError::InvalidScalar(
+                    naga::Scalar::F16
+                )
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16 | naga::valid::Capabilities::PUSH_CONSTANT
+    }
+}
+
+#[test]
+fn float16_in_atomic() {
+    check_validation! {
+        "enable f16; var<storage> a: atomic<f16>;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::InvalidAtomicWidth(
+                naga::ScalarKind::Float,
+                2
+            ),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_FLOAT16
+    }
+}
+
 #[test]
 fn invalid_arrays() {
     check_validation! {
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
index abdab2b852..fe1f98a050 100644
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@@ -49,6 +49,7 @@ cfg-if.workspace = true
 ctor.workspace = true
 futures-lite.workspace = true
 glam.workspace = true
+half = { workspace = true, features = ["bytemuck"] }
 itertools.workspace = true
 image.workspace = true
 libtest-mimic.workspace = true
diff --git a/tests/gpu-tests/shader/mod.rs b/tests/gpu-tests/shader/mod.rs
index 9a3bae0d40..15d4c5e5cf 100644
--- a/tests/gpu-tests/shader/mod.rs
+++ b/tests/gpu-tests/shader/mod.rs
@@ -143,8 +143,8 @@ impl ShaderTest {
             body,
             input_type: String::from("CustomStruct"),
             output_type: String::from("array<u32>"),
-            input_values: bytemuck::cast_slice(input_values).to_vec(),
-            output_values: vec![bytemuck::cast_slice(output_values).to_vec()],
+            input_values: bytemuck::pod_collect_to_vec(input_values),
+            output_values: vec![bytemuck::pod_collect_to_vec(output_values)],
             output_comparison_fn: Self::default_comparison_function::<O>,
             output_initialization: u32::MAX,
             failures: Backends::empty(),
diff --git a/tests/gpu-tests/shader/struct_layout.rs b/tests/gpu-tests/shader/struct_layout.rs
index 38a040fcad..25bd32bf77 100644
--- a/tests/gpu-tests/shader/struct_layout.rs
+++ b/tests/gpu-tests/shader/struct_layout.rs
@@ -5,6 +5,60 @@ use wgpu::{Backends, DownlevelFlags, Features, Limits};
 use crate::shader::{shader_input_output_test, InputStorageType, ShaderTest, MAX_BUFFER_SIZE};
 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
 
+#[gpu_test]
+static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
+            .expect_fail(
+                FailureCase::backend(wgpu::Backends::VULKAN)
+                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
+            )
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Uniform,
+            create_struct_layout_tests(InputStorageType::Uniform),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_struct_layout_tests(InputStorageType::Storage),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_struct_layout_tests(InputStorageType::PushConstant),
+        )
+    });
+
 fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -253,6 +307,57 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
     tests
 }
 
+#[gpu_test]
+static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
 fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
     let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
 
@@ -356,29 +461,26 @@ fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
 }
 
 #[gpu_test]
-static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static UNIFORM_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            // Validation errors thrown by the SPIR-V validator https://github.com/gfx-rs/wgpu/issues/4371
-            .expect_fail(
-                FailureCase::backend(wgpu::Backends::VULKAN)
-                    .validation_error("a matrix with stride 8 not satisfying alignment to 16"),
-            )
             .limits(Limits::downlevel_defaults()),
     )
     .run_async(|ctx| {
         shader_input_output_test(
             ctx,
-            InputStorageType::Uniform,
-            create_struct_layout_tests(InputStorageType::Uniform),
+            InputStorageType::Storage,
+            create_16bit_struct_layout_test(),
         )
     });
 
 #[gpu_test]
-static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
+static STORAGE_INPUT_F16: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
+            .features(Features::SHADER_F16)
             .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
             .limits(Limits::downlevel_defaults()),
     )
@@ -386,76 +488,205 @@ static STORAGE_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
         shader_input_output_test(
             ctx,
             InputStorageType::Storage,
-            create_struct_layout_tests(InputStorageType::Storage),
+            create_16bit_struct_layout_test(),
         )
     });
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_struct_layout_tests(InputStorageType::PushConstant),
-        )
-    });
+fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
+    let mut tests = Vec::new();
 
-#[gpu_test]
-static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    fn f16asu16(f32: f32) -> u16 {
+        half::f16::from_f32(f32).to_bits()
+    }
 
-#[gpu_test]
-static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits::downlevel_defaults()),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::Storage,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+    // 16 bit alignment tests
+    {
+        let members =
+            "scalar1: f16, scalar2: f16, v3: vec3<f16>, tuck_in: f16, scalar4: f16, larger: u32";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.scalar1);
+            output[1] = u32(input.scalar2);
+            output[2] = u32(input.v3.x);
+            output[3] = u32(input.v3.y);
+            output[4] = u32(input.v3.z);
+            output[5] = u32(input.tuck_in);
+            output[6] = u32(input.scalar4);
+            output[7] = u32(extractBits(input.larger, 0u, 16u));
+            output[8] = u32(extractBits(input.larger, 16u, 16u));
+        ",
+        );
 
-#[gpu_test]
-static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(
-        TestParameters::default()
-            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
-            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
-            .limits(Limits {
-                max_push_constant_size: MAX_BUFFER_SIZE as u32,
-                ..Limits::downlevel_defaults()
-            }),
-    )
-    .run_async(|ctx| {
-        shader_input_output_test(
-            ctx,
-            InputStorageType::PushConstant,
-            create_64bit_struct_layout_tests(),
-        )
-    });
+        tests.push(ShaderTest::new(
+            "f16 alignment".into(),
+            members.into(),
+            direct,
+            &[
+                f16asu16(0.0),
+                f16asu16(1.0),
+                f16asu16(2.0),
+                f16asu16(3.0),
+                f16asu16(4.0),
+                f16asu16(5.0),
+                f16asu16(6.0),
+                f16asu16(7.0),
+                f16asu16(8.0),
+                f16asu16(9.0),
+                10_u16,
+                11_u16,
+                // Some extra values to help debug if the test fails.
+                12_u16,
+                13_u16,
+                14_u16,
+                15_u16,
+                16_u16,
+                17_u16,
+                18_u16,
+                19_u16,
+                20_u16,
+            ],
+            &[
+                0, // scalar1
+                1, // scalar2
+                4, 5, 6,  // v3
+                7,  // tuck_in
+                8,  // scalar4
+                10, // larger[0..16]
+                11, // larger[16..32]
+            ],
+        ));
+    }
+
+    // Matrix tests
+    {
+        let members = "m2: mat2x2h, m3: mat3x3h, m4: mat4x4h";
+        let direct = String::from(
+            "\
+            output[0] = u32(input.m2[0].x);
+            output[1] = u32(input.m2[0].y);
+            output[2] = u32(input.m2[1].x);
+            output[3] = u32(input.m2[1].y);
+
+            output[4] = u32(input.m3[0].x);
+            output[5] = u32(input.m3[0].y);
+            output[6] = u32(input.m3[0].z);
+            output[7] = u32(input.m3[1].x);
+            output[8] = u32(input.m3[1].y);
+            output[9] = u32(input.m3[1].z);
+            output[10] = u32(input.m3[2].x);
+            output[11] = u32(input.m3[2].y);
+            output[12] = u32(input.m3[2].z);
+
+            output[13] = u32(input.m4[0].x);
+            output[14] = u32(input.m4[0].y);
+            output[15] = u32(input.m4[0].z);
+            output[16] = u32(input.m4[0].w);
+            output[17] = u32(input.m4[1].x);
+            output[18] = u32(input.m4[1].y);
+            output[19] = u32(input.m4[1].z);
+            output[20] = u32(input.m4[1].w);
+            output[21] = u32(input.m4[2].x);
+            output[22] = u32(input.m4[2].y);
+            output[23] = u32(input.m4[2].z);
+            output[24] = u32(input.m4[2].w);
+            output[25] = u32(input.m4[3].x);
+            output[26] = u32(input.m4[3].y);
+            output[27] = u32(input.m4[3].z);
+            output[28] = u32(input.m4[3].w);
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            "f16 matrix alignment".into(),
+            members.into(),
+            direct,
+            &(0..32).map(|x| f16asu16(x as f32)).collect::<Vec<_>>(),
+            &[
+                0, 1, // m2[0]
+                2, 3, // m2[1]
+                //
+                4, 5, 6, // m3[0]
+                8, 9, 10, // m3[1]
+                12, 13, 14, // m3[2]
+                //
+                16, 17, 18, 19, // m4[0]
+                20, 21, 22, 23, // m4[1]
+                24, 25, 26, 27, // m4[2]
+                28, 29, 30, 31, // m4[3]
+            ],
+        ));
+    }
+
+    // // Nested struct and array test.
+    // //
+    // // This tries to exploit all the weird edge cases of the struct layout algorithm.
+    // // We dont go as all-out as the other nested struct test because
+    // // all our primitives are twice as wide and we have only so much buffer to spare.
+    // {
+    //     let header = String::from(
+    //         "struct Inner { scalar: u64, scalar32: u32, member: array<vec3<u64>, 2> }",
+    //     );
+    //     let members = String::from("inner: Inner");
+    //     let direct = String::from(
+    //         "\
+
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, 1, // inner.scalar
+    //                 2, // inner.scalar32
+    //                 8, 9, 10, 11, 12, 13, // inner.member[0]
+    //                 16, 17, 18, 19, 20, 21, // inner.member[1]
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+    // {
+    //     let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }");
+    //     let members = String::from("inner: Inner, vector: vec3<i64>");
+    //     let direct = String::from(
+    //         "\
+    //         output[0] = bitcast<u32>(input.inner.scalar32);
+    //         output[1] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
+    //         output[2] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
+    //         output[3] = bitcast<u32>(input.inner.scalar32_2);
+    //         output[4] = u32(bitcast<u64>(input.vector.x) & 0xFFFFFFFF);
+    //         output[5] = u32((bitcast<u64>(input.vector.x) >> 32) & 0xFFFFFFFF);
+    //         output[6] = u32(bitcast<u64>(input.vector.y) & 0xFFFFFFFF);
+    //         output[7] = u32((bitcast<u64>(input.vector.y) >> 32) & 0xFFFFFFFF);
+    //         output[8] = u32(bitcast<u64>(input.vector.z) & 0xFFFFFFFF);
+    //         output[9] = u32((bitcast<u64>(input.vector.z) >> 32) & 0xFFFFFFFF);
+    //     ",
+    //     );
+
+    //     tests.push(
+    //         ShaderTest::new(
+    //             String::from("nested struct and array"),
+    //             members,
+    //             direct,
+    //             &input_values,
+    //             &[
+    //                 0, // inner.scalar32
+    //                 2, 3, // inner.scalar
+    //                 4, // inner.scalar32_2
+    //                 8, 9, 10, 11, 12, 13, // vector
+    //             ],
+    //         )
+    //         .header(header),
+    //     );
+    // }
+
+    // Insert `enable f16;` header
+    tests
+        .into_iter()
+        .map(|test| test.header("enable f16;".into()))
+        .collect()
+}
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 30c4daad6d..a8d1329fac 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -383,6 +383,10 @@ pub fn create_validator(
         features.contains(wgt::Features::PUSH_CONSTANTS),
     );
     caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
+    caps.set(
+        Caps::SHADER_FLOAT16,
+        features.contains(wgt::Features::SHADER_F16),
+    );
     caps.set(
         Caps::PRIMITIVE_INDEX,
         features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 82b62c5161..50aa80ad28 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -425,6 +425,23 @@ impl super::Adapter {
                 && features1.Int64ShaderOps.as_bool(),
         );
 
+        let float16_supported = {
+            let mut features4 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4::default();
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    Direct3D12::D3D12_FEATURE_D3D12_OPTIONS4, // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_feature#syntax
+                    ptr::from_mut(&mut features4).cast(),
+                    size_of::<Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS4>() as _,
+                )
+            };
+            hr.is_ok() && features4.Native16BitShaderOpsSupported.as_bool()
+        };
+
+        features.set(
+            wgt::Features::SHADER_F16,
+            shader_model >= naga::back::hlsl::ShaderModel::V6_2 && float16_supported,
+        );
+
         features.set(
             wgt::Features::TEXTURE_INT64_ATOMIC,
             shader_model >= naga::back::hlsl::ShaderModel::V6_6
@@ -612,7 +629,7 @@ impl crate::Adapter for super::Adapter {
 
     unsafe fn open(
         &self,
-        _features: wgt::Features,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -633,6 +650,7 @@ impl crate::Adapter for super::Adapter {
         let device = super::Device::new(
             self.device.clone(),
             queue.clone(),
+            features,
             limits,
             memory_hints,
             self.private_caps,
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 8b03df0e97..a3bfad3359 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -33,9 +33,11 @@ use crate::{
 const NAGA_LOCATION_SEMANTIC: &[u8] = c"LOC".to_bytes();
 
 impl super::Device {
+    #[allow(clippy::too_many_arguments)]
     pub(super) fn new(
         raw: Direct3D12::ID3D12Device,
         present_queue: Direct3D12::ID3D12CommandQueue,
+        features: wgt::Features,
         limits: &wgt::Limits,
         memory_hints: &wgt::MemoryHints,
         private_caps: super::PrivateCapabilities,
@@ -178,6 +180,7 @@ impl super::Device {
                 event: Event::create(false, false)?,
             },
             private_caps,
+            features,
             shared: Arc::new(shared),
             rtv_pool: Mutex::new(rtv_pool),
             dsv_pool: Mutex::new(descriptor::CpuPool::new(
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 1df4886028..48fcbee6eb 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -634,6 +634,7 @@ pub struct Device {
     present_queue: Direct3D12::ID3D12CommandQueue,
     idler: Idler,
     private_caps: PrivateCapabilities,
+    features: wgt::Features,
     shared: Arc<DeviceShared>,
     // CPU only pools
     rtv_pool: Mutex<descriptor::CpuPool>,
diff --git a/wgpu-hal/src/dx12/shader_compilation.rs b/wgpu-hal/src/dx12/shader_compilation.rs
index 4574fac5d4..834409eefc 100644
--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@@ -268,7 +268,7 @@ pub(super) fn compile_dxc(
     let raw_ep = OPCWSTR::new(raw_ep);
     let full_stage = OPCWSTR::new(full_stage);
 
-    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 12>::new_const();
+    let mut compile_args = arrayvec::ArrayVec::<PCWSTR, 13>::new_const();
 
     if let Some(source_name) = source_name.as_ref() {
         compile_args.push(source_name.ptr())
@@ -298,6 +298,10 @@ pub(super) fn compile_dxc(
         compile_args.push(Dxc::DXC_ARG_SKIP_OPTIMIZATIONS);
     }
 
+    if device.features.contains(wgt::Features::SHADER_F16) {
+        compile_args.push(windows::core::w!("-enable-16bit-types"));
+    }
+
     let buffer = Dxc::DxcBuffer {
         Ptr: source.as_ptr().cast(),
         Size: source.len(),
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 8d315f042b..a438401277 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -399,6 +399,7 @@ impl PhysicalDeviceFeatures {
                     vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
+                        .storage_input_output16(true)
                         .uniform_and_storage_buffer16_bit_access(true),
                 ))
             } else {
@@ -720,7 +721,8 @@ impl PhysicalDeviceFeatures {
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0
+                    && bit16.storage_input_output16 != 0,
             );
         }
 
@@ -1968,6 +1970,10 @@ impl super::Adapter {
                 capabilities.push(spv::Capability::Int64);
             }
 
+            if features.contains(wgt::Features::SHADER_F16) {
+                capabilities.push(spv::Capability::Float16);
+            }
+
             if features.intersects(
                 wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS
                     | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX

From c4e49c67fd7309d0c33287b9a53a9b5703eb82ee Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Tue, 18 Mar 2025 13:53:24 -0400
Subject: [PATCH 2/4] Comments

---
 Cargo.lock                              | 14 +-----
 Cargo.toml                              |  3 +-
 naga/src/front/wgsl/error.rs            |  8 +--
 naga/src/front/wgsl/parse/mod.rs        |  2 +-
 naga/src/front/wgsl/parse/number.rs     |  2 +-
 naga/src/ir/mod.rs                      |  1 +
 naga/tests/wgsl_errors.rs               | 18 ++++---
 tests/gpu-tests/shader/struct_layout.rs | 67 -------------------------
 wgpu-types/src/features.rs              |  8 +--
 9 files changed, 25 insertions(+), 98 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a90f06c9a0..0a3ee03339 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1784,16 +1784,6 @@ name = "half"
 version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
-dependencies = [
- "cfg-if",
- "crunchy",
-]
-
-[[package]]
-name = "half-2"
-version = "2.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36518ae13d88b7cbdc61401256df5d9fc27921ea66353a16660869b47af8864"
 dependencies = [
  "arbitrary",
  "bytemuck",
@@ -2413,7 +2403,7 @@ dependencies = [
  "codespan-reporting",
  "diff",
  "env_logger",
- "half-2",
+ "half",
  "hashbrown",
  "hexf-parse",
  "hlsl-snapshots",
@@ -4866,7 +4856,7 @@ dependencies = [
  "env_logger",
  "futures-lite",
  "glam",
- "half-2",
+ "half",
  "image",
  "itertools 0.13.0",
  "js-sys",
diff --git a/Cargo.toml b/Cargo.toml
index b2ccb84973..d8ad7a0381 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,8 +108,7 @@ fern = "0.7"
 flume = "0.11"
 futures-lite = "2"
 glam = "0.29"
-# TODO: Use `half` directly again after <https://github.com/starkat99/half-rs/pull/111> is released upstream.
-half = { package = "half-2", version = "2.4.1" }
+half = "2.5" # We require 2.5 to have `Arbitrary` support.
 hashbrown = { version = "0.14.5", default-features = false, features = [
     "ahash",
     "inline-more",
diff --git a/naga/src/front/wgsl/error.rs b/naga/src/front/wgsl/error.rs
index aae67a94b5..346cd8e6ce 100644
--- a/naga/src/front/wgsl/error.rs
+++ b/naga/src/front/wgsl/error.rs
@@ -1024,12 +1024,12 @@ impl<'a> Error<'a> {
                 )],
             },
             Error::EnableExtensionNotEnabled { kind, span } => ParseError {
-                message: format!("the `{}` language extension is not enabled", kind.to_ident()),
+                message: format!("the `{}` enable extension is not enabled", kind.to_ident()),
                 labels: vec![(
                     span,
                     format!(
                         concat!(
-                            "the `{0}` language extension is needed for this functionality, ",
+                            "the `{}` \"Enable Extension\" is needed for this functionality, ",
                             "but it is not currently enabled."
                         ),
                         kind.to_ident()
@@ -1039,7 +1039,7 @@ impl<'a> Error<'a> {
                 notes: if let EnableExtension::Unimplemented(kind) = kind {
                     vec![format!(
                         concat!(
-                            "This enable-extension is not yet implemented. ",
+                            "This \"Enable Extension\" is not yet implemented. ",
                             "Let Naga maintainers know that you ran into this at ",
                             "<https://github.com/gfx-rs/wgpu/issues/{}>, ",
                             "so they can prioritize it!"
@@ -1049,7 +1049,7 @@ impl<'a> Error<'a> {
                 } else {
                     vec![
                         format!(
-                            "You can enable this extension by adding `enable {};` at the top of the shader.",
+                            "You can enable this extension by adding `enable {};` at the top of the shader, before any other items.",
                             kind.to_ident()
                         ),
                     ]
diff --git a/naga/src/front/wgsl/parse/mod.rs b/naga/src/front/wgsl/parse/mod.rs
index cb59a82a24..fe878969b5 100644
--- a/naga/src/front/wgsl/parse/mod.rs
+++ b/naga/src/front/wgsl/parse/mod.rs
@@ -837,7 +837,7 @@ impl Parser {
                 let _ = lexer.next();
                 let num = res.map_err(|err| Error::BadNumber(span, err))?;
 
-                if let Some(enable_extension) = num.required_enable_extension() {
+                if let Some(enable_extension) = num.requires_enable_extension() {
                     if !lexer.enable_extensions.contains(enable_extension) {
                         return Err(Box::new(Error::EnableExtensionNotEnabled {
                             kind: enable_extension.into(),
diff --git a/naga/src/front/wgsl/parse/number.rs b/naga/src/front/wgsl/parse/number.rs
index 776c357343..ea3e43e3dd 100644
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@@ -29,7 +29,7 @@ pub enum Number {
 }
 
 impl Number {
-    pub(super) const fn required_enable_extension(&self) -> Option<ImplementedEnableExtension> {
+    pub(super) const fn requires_enable_extension(&self) -> Option<ImplementedEnableExtension> {
         match *self {
             Number::F16(_) => Some(ImplementedEnableExtension::F16),
             _ => None,
diff --git a/naga/src/ir/mod.rs b/naga/src/ir/mod.rs
index ed7fd7fece..167871a8c0 100644
--- a/naga/src/ir/mod.rs
+++ b/naga/src/ir/mod.rs
@@ -819,6 +819,7 @@ pub enum Literal {
     F64(f64),
     /// May not be NaN or infinity.
     F32(f32),
+    /// May not be NaN or infinity.
     F16(f16),
     U32(u32),
     I32(i32),
diff --git a/naga/tests/wgsl_errors.rs b/naga/tests/wgsl_errors.rs
index e1d0a42234..a9cdfbb25a 100644
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@@ -847,13 +847,13 @@ fn float16_requires_enable() {
         r#"
             const a: f16 = 1.0;
         "#,
-        r#"error: the `f16` language extension is not enabled
+        r#"error: the `f16` enable extension is not enabled
   ┌─ wgsl:2:22
   │
 2 │             const a: f16 = 1.0;
-  │                      ^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │                      ^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
   │
-  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
 
 "#,
     );
@@ -862,13 +862,13 @@ fn float16_requires_enable() {
         r#"
             const a = 1.0h;
         "#,
-        r#"error: the `f16` language extension is not enabled
+        r#"error: the `f16` enable extension is not enabled
   ┌─ wgsl:2:23
   │
 2 │             const a = 1.0h;
-  │                       ^^^^ the `f16` language extension is needed for this functionality, but it is not currently enabled.
+  │                       ^^^^ the `f16` "Enable Extension" is needed for this functionality, but it is not currently enabled.
   │
-  = note: You can enable this extension by adding `enable f16;` at the top of the shader.
+  = note: You can enable this extension by adding `enable f16;` at the top of the shader, before any other items.
 
 "#,
     );
@@ -1461,11 +1461,13 @@ fn invalid_blend_src() {
         @fragment
         fn main(@builtin(position) position: vec4<f32>) -> FragmentOutput { return FragmentOutput(vec4(0.0), vec4(0.0)); }
         ",
-        r###"error: `dual_source_blending` enable-extension is not enabled
+        r###"error: the `dual_source_blending` enable extension is not enabled
   ┌─ wgsl:3:27
   │
 3 │             @location(0) @blend_src(0) output0: vec4<f32>,
-  │                           ^^^^^^^^^ the `dual_source_blending` enable-extension is needed for this functionality, but it is not currently enabled
+  │                           ^^^^^^^^^ the `dual_source_blending` "Enable Extension" is needed for this functionality, but it is not currently enabled.
+  │
+  = note: You can enable this extension by adding `enable dual_source_blending;` at the top of the shader, before any other items.
 
 "###,
     );
diff --git a/tests/gpu-tests/shader/struct_layout.rs b/tests/gpu-tests/shader/struct_layout.rs
index 25bd32bf77..cf57e2a04e 100644
--- a/tests/gpu-tests/shader/struct_layout.rs
+++ b/tests/gpu-tests/shader/struct_layout.rs
@@ -617,73 +617,6 @@ fn create_16bit_struct_layout_test() -> Vec<ShaderTest> {
         ));
     }
 
-    // // Nested struct and array test.
-    // //
-    // // This tries to exploit all the weird edge cases of the struct layout algorithm.
-    // // We dont go as all-out as the other nested struct test because
-    // // all our primitives are twice as wide and we have only so much buffer to spare.
-    // {
-    //     let header = String::from(
-    //         "struct Inner { scalar: u64, scalar32: u32, member: array<vec3<u64>, 2> }",
-    //     );
-    //     let members = String::from("inner: Inner");
-    //     let direct = String::from(
-    //         "\
-
-    //     ",
-    //     );
-
-    //     tests.push(
-    //         ShaderTest::new(
-    //             String::from("nested struct and array"),
-    //             members,
-    //             direct,
-    //             &input_values,
-    //             &[
-    //                 0, 1, // inner.scalar
-    //                 2, // inner.scalar32
-    //                 8, 9, 10, 11, 12, 13, // inner.member[0]
-    //                 16, 17, 18, 19, 20, 21, // inner.member[1]
-    //             ],
-    //         )
-    //         .header(header),
-    //     );
-    // }
-    // {
-    //     let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }");
-    //     let members = String::from("inner: Inner, vector: vec3<i64>");
-    //     let direct = String::from(
-    //         "\
-    //         output[0] = bitcast<u32>(input.inner.scalar32);
-    //         output[1] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
-    //         output[2] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
-    //         output[3] = bitcast<u32>(input.inner.scalar32_2);
-    //         output[4] = u32(bitcast<u64>(input.vector.x) & 0xFFFFFFFF);
-    //         output[5] = u32((bitcast<u64>(input.vector.x) >> 32) & 0xFFFFFFFF);
-    //         output[6] = u32(bitcast<u64>(input.vector.y) & 0xFFFFFFFF);
-    //         output[7] = u32((bitcast<u64>(input.vector.y) >> 32) & 0xFFFFFFFF);
-    //         output[8] = u32(bitcast<u64>(input.vector.z) & 0xFFFFFFFF);
-    //         output[9] = u32((bitcast<u64>(input.vector.z) >> 32) & 0xFFFFFFFF);
-    //     ",
-    //     );
-
-    //     tests.push(
-    //         ShaderTest::new(
-    //             String::from("nested struct and array"),
-    //             members,
-    //             direct,
-    //             &input_values,
-    //             &[
-    //                 0, // inner.scalar32
-    //                 2, 3, // inner.scalar
-    //                 4, // inner.scalar32_2
-    //                 8, 9, 10, 11, 12, 13, // vector
-    //             ],
-    //         )
-    //         .header(header),
-    //     );
-    // }
-
     // Insert `enable f16;` header
     tests
         .into_iter()
diff --git a/wgpu-types/src/features.rs b/wgpu-types/src/features.rs
index c6e98e6566..b375b3c26c 100644
--- a/wgpu-types/src/features.rs
+++ b/wgpu-types/src/features.rs
@@ -1340,18 +1340,20 @@ bitflags_array! {
         /// This is a web and native feature.
         const INDIRECT_FIRST_INSTANCE = WEBGPU_FEATURE_INDIRECT_FIRST_INSTANCE;
 
-        /// Allows shaders to acquire the FP16 ability
+        /// Allows shaders to use 16-bit floating point types. You may use them uniform buffers,
+        /// storage buffers, and local variables. You may not use them in push constants.
         ///
-        /// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now.
+        /// In order to use this in WGSL shaders, you must add `enable f16;` to the top of your shader,
+        /// before any global items.
         ///
         /// Supported Platforms:
         /// - Vulkan
         /// - Metal
+        /// - DX12
         ///
         /// This is a web and native feature.
         const SHADER_F16 = WEBGPU_FEATURE_SHADER_F16;
 
-
         /// Allows for usage of textures of format [`TextureFormat::Rg11b10Ufloat`] as a render target
         ///
         /// Supported platforms:

From 136180582c3ab120554851461de9029eb1b8e00d Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Tue, 18 Mar 2025 14:18:34 -0400
Subject: [PATCH 3/4] Bump version of num-traits to 0.2.16

---
 naga/Cargo.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index b722e7a2cb..2f57895e11 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -95,7 +95,8 @@ half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
-num-traits = "0.2"
+# We require 0.2.16 to have `FromBytes` and `ToBytes` which `half` require.
+num-traits = "0.2.16"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }
 thiserror.workspace = true

From 04dfdd5f74fec9940e45307c1677bf43282dce66 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 19 Mar 2025 09:53:47 -0400
Subject: [PATCH 4/4] fixup! Bump version of num-traits to 0.2.16

---
 naga/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/naga/Cargo.toml b/naga/Cargo.toml
index 2f57895e11..445ad73c17 100644
--- a/naga/Cargo.toml
+++ b/naga/Cargo.toml
@@ -95,7 +95,7 @@ half = { workspace = true, features = ["arbitrary", "num-traits"] }
 rustc-hash.workspace = true
 indexmap.workspace = true
 log = "0.4"
-# We require 0.2.16 to have `FromBytes` and `ToBytes` which `half` require.
+# `half` requires 0.2.16 for `FromBytes` and `ToBytes`.
 num-traits = "0.2.16"
 strum = { workspace = true, optional = true }
 spirv = { version = "0.3", optional = true }