Devsh-Graphics-Programming · devshgraphicsprogramming · Nov 7, 2025 · Sep 18, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h
@@ -9,6 +9,7 @@
 
 #include "nbl/asset/ICPUPolygonGeometry.h"
 #include "nbl/asset/utils/CGeometryManipulator.h"
+#include "nbl/asset/utils/CSmoothNormalGenerator.h"
 
 namespace nbl::asset
 {
@@ -17,17 +18,6 @@ namespace nbl::asset
 class NBL_API2 CPolygonGeometryManipulator
 {
 	public:
-		//vertex data needed for CSmoothNormalGenerator
-		struct SSNGVertexData
-		{
-			uint32_t index;									     //offset of the vertex into index buffer
-			uint32_t hash;											       //
-			float wage;												         //angle wage of the vertex
-			hlsl::float32_t3 position;							   //position of the vertex in 3D space
-			hlsl::float32_t3 parentTriangleFaceNormal; //
-		};
-
-		using VxCmpFunction = std::function<bool(const SSNGVertexData&, const SSNGVertexData&, const ICPUPolygonGeometry*)>;
 
 		static inline void recomputeContentHashes(ICPUPolygonGeometry* geo)
 		{
@@ -243,11 +233,14 @@ class NBL_API2 CPolygonGeometryManipulator
 
 		static core::smart_refctd_ptr<ICPUPolygonGeometry> createUnweldedList(const ICPUPolygonGeometry* inGeo);
 
+		using SSNGVertexData = CSmoothNormalGenerator::VertexData;
+		using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction;
+
 		static core::smart_refctd_ptr<ICPUPolygonGeometry> createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f,
-				VxCmpFunction vxcmp = [](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) 
+				SSNGVxCmpFunction vxcmp = [](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) 
 				{ 
-					static constexpr float cosOf45Deg = 0.70710678118f;
-					return dot(v0.parentTriangleFaceNormal,v1.parentTriangleFaceNormal) > cosOf45Deg;
+					constexpr float cosOf45Deg = 0.70710678118f;
+					return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg;
 				});
 
 #if 0 // TODO: REDO
@@ -277,14 +270,14 @@ class NBL_API2 CPolygonGeometryManipulator
 		};
 		typedef std::function<bool(const IMeshManipulator::SSNGVertexData&, const IMeshManipulator::SSNGVertexData&, ICPUMeshBuffer*)> VxCmpFunction;
 
-        //! Compares two attributes of floating point types in accordance with passed error metric.
-        /**
-        @param _a First attribute.
-        @param _b Second attribute.
-        @param _cpa Component count.
-        @param _errMetric Error metric info.
-        */
-        static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric)
+				//! Compares two attributes of floating point types in accordance with passed error metric.
+				/**
+				@param _a First attribute.
+				@param _b Second attribute.
+				@param _cpa Component count.
+				@param _errMetric Error metric info.
+				*/
+				static inline bool compareFloatingPointAttribute(const core::vectorSIMDf& _a, const core::vectorSIMDf& _b, size_t _cpa, const SErrorMetric& _errMetric)
 		{
 			using ErrorF_t = core::vectorSIMDf(*)(core::vectorSIMDf, core::vectorSIMDf);
 
@@ -365,41 +358,41 @@ class NBL_API2 CPolygonGeometryManipulator
 		}
 
 
-        //! Swaps the index buffer for a new index buffer with invalid triangles removed.
-        /**
-        Invalid triangle is such consisting of two or more same indices.
-        @param _input Input index buffer.
-        @param _idxType Type of indices in the index buffer.
-        @returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr.
-        */
-        static void filterInvalidTriangles(ICPUMeshBuffer* _input);
-
-        //! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip.
-        /**
-        @param _input Input index buffer's data.
-        @param _idxCount Index count.
-        @param _inIndexType Type of input index buffer data (32bit or 16bit).
-        @param _outIndexType Type of output index buffer data (32bit or 16bit).
-        */
-        static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
-
-        //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip.
-        /**
-        @param _input Input index buffer's data.
-        @param _idxCount Index count.
-        @param _inIndexType Type of input index buffer data (32bit or 16bit).
-        @param _outIndexType Type of output index buffer data (32bit or 16bit).
-        */
-        static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
-
-        //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan.
-        /**
-        @param _input Input index buffer's data.
-        @param _idxCount Index count.
-        @param _inIndexType Type of input index buffer data (32bit or 16bit).
-        @param _outIndexType Type of output index buffer data (32bit or 16bit).
-        */
-        static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
+				//! Swaps the index buffer for a new index buffer with invalid triangles removed.
+				/**
+				Invalid triangle is such consisting of two or more same indices.
+				@param _input Input index buffer.
+				@param _idxType Type of indices in the index buffer.
+				@returns New index buffer or nullptr if input indices were of unknown type or _input was nullptr.
+				*/
+				static void filterInvalidTriangles(ICPUMeshBuffer* _input);
+
+				//! Creates index buffer from input converting it to indices for line list primitives. Input is assumed to be indices for line strip.
+				/**
+				@param _input Input index buffer's data.
+				@param _idxCount Index count.
+				@param _inIndexType Type of input index buffer data (32bit or 16bit).
+				@param _outIndexType Type of output index buffer data (32bit or 16bit).
+				*/
+				static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
+
+				//! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip.
+				/**
+				@param _input Input index buffer's data.
+				@param _idxCount Index count.
+				@param _inIndexType Type of input index buffer data (32bit or 16bit).
+				@param _outIndexType Type of output index buffer data (32bit or 16bit).
+				*/
+				static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
+
+				//! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan.
+				/**
+				@param _input Input index buffer's data.
+				@param _idxCount Index count.
+				@param _inIndexType Type of input index buffer data (32bit or 16bit).
+				@param _outIndexType Type of output index buffer data (32bit or 16bit).
+				*/
+				static core::smart_refctd_ptr<ICPUBuffer> idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType);
 
 		//!
 		static inline std::array<uint32_t,3u> getTriangleIndices(const ICPUMeshBuffer* mb, uint32_t triangleIx)
@@ -635,7 +628,7 @@ class NBL_API2 CPolygonGeometryManipulator
 
 		//! Creates a copy of a mesh with vertices welded
 		/** \param mesh Input mesh
-        \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type).
+				\param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type).
 		\param tolerance The threshold for vertex comparisons.
 		\return Mesh without redundant vertices. */
 		static core::smart_refctd_ptr<ICPUMeshBuffer> createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* errMetrics, const bool& optimIndexType = true, const bool& makeNewMesh = false);
@@ -653,12 +646,12 @@ class NBL_API2 CPolygonGeometryManipulator
 		*/
 		static void requantizeMeshBuffer(ICPUMeshBuffer* _meshbuffer, const SErrorMetric* _errMetric);
 
-        //! Creates a 32bit index buffer for a mesh with primitive types changed to list types
-        /**#
+				//! Creates a 32bit index buffer for a mesh with primitive types changed to list types
+				/**#
 		@param _newPrimitiveType
-        @param _begin non-const iterator to beginning of meshbuffer range
-        @param _end non-const iterator to ending of meshbuffer range
-        */
+				@param _begin non-const iterator to beginning of meshbuffer range
+				@param _end non-const iterator to ending of meshbuffer range
+				*/
 		template<typename Iterator>
 		static inline void homogenizePrimitiveTypeAndIndices(Iterator _begin, Iterator _end, const E_PRIMITIVE_TOPOLOGY _newPrimitiveType, const E_INDEX_TYPE outIndexType = EIT_32BIT)
 		{

diff --git a/include/nbl/asset/utils/CVertexHashGrid.h b/include/nbl/asset/utils/CVertexHashGrid.h
@@ -0,0 +1,221 @@
+#ifndef _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_
+#define _NBL_ASSET_C_VERTEX_HASH_MAP_H_INCLUDED_
+
+#include "nbl/core/declarations.h"
+
+namespace nbl::asset
+{
+
+template <typename T>
+concept HashGridVertexData = requires(T obj, T const cobj, uint32_t hash) {
+		{ cobj.getHash() } -> std::same_as<uint32_t>;
+		{ obj.setHash(hash) } -> std::same_as<void>;
+		{ cobj.getPosition() } -> std::same_as<hlsl::float32_t3>;
+};
+
+template <typename Fn, typename T>
+concept HashGridIteratorFn = HashGridVertexData<T> && requires(Fn && fn, T const cobj)
+{
+	// return whether hash grid should continue the iteration
+	{ std::invoke(std::forward<Fn>(fn), cobj) } -> std::same_as<bool>;
+};
+
+// TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument
+template <HashGridVertexData VertexData>
+class CVertexHashGrid
+{
+public:
+
+	using vertex_data_t = VertexData;
+	using collection_t = core::vector<VertexData>;
+	struct BucketBounds
+	{
+		collection_t::const_iterator begin;
+		collection_t::const_iterator end;
+	};
+
+	inline CVertexHashGrid(float cellSize, uint32_t hashTableMaxSizeLog2, size_t vertexCountReserve = 8192) :
+		m_cellSize(cellSize),
+		m_hashTableMaxSize(1llu << hashTableMaxSizeLog2),
+		m_sorter(createSorter(vertexCountReserve))
+	{
+		m_vertices.reserve(vertexCountReserve);
+	}
+
+	//inserts vertex into hash table
+	inline void add(VertexData&& vertex)
+	{
+		vertex.setHash(hash(vertex));
+		m_vertices.push_back(std::move(vertex));
+	}
+
+	inline void bake()
+	{
+		auto scratchBuffer = collection_t(m_vertices.size());
+
+		auto finalSortedOutput = std::visit( [&](auto& sorter)
+		{
+			return sorter(m_vertices.data(), scratchBuffer.data(), m_vertices.size(), KeyAccessor());
+		}, m_sorter );
+
+		if (finalSortedOutput != m_vertices.data())
+			m_vertices = std::move(scratchBuffer);
+	}
+
+	inline const collection_t& vertices() const { return m_vertices; }
+
+	inline uint32_t getVertexCount() const { return m_vertices.size(); }
+
+	template <HashGridIteratorFn<VertexData> Fn>
+	inline void forEachBroadphaseNeighborCandidates(const hlsl::float32_t3& position, Fn&& fn) const
+	{
+		std::array<uint32_t, 8> neighboringCells;
+		const auto cellCount = getNeighboringCellHashes(neighboringCells.data(), position);
+
+		//iterate among all neighboring cells
+		for (uint8_t i = 0; i < cellCount; i++)
+		{
+			const auto& neighborCell = neighboringCells[i];
+			BucketBounds bounds = getBucketBoundsByHash(neighborCell);
+			for (; bounds.begin != bounds.end; bounds.begin++)
+			{
+				const vertex_data_t& neighborVertex = *bounds.begin;
+				if (!std::invoke(std::forward<Fn>(fn), neighborVertex)) break;
+			}
+		}
+	}
+
+private:
+	struct KeyAccessor
+	{
+		constexpr static inline size_t key_bit_count = 32ull;
+
+		template<auto bit_offset, auto radix_mask>
+		inline decltype(radix_mask) operator()(const VertexData& item) const
+		{
+			return static_cast<decltype(radix_mask)>(item.getHash() >> static_cast<uint32_t>(bit_offset)) & radix_mask;
+		}
+	};
+
+	static constexpr inline uint32_t primeNumber1 = 73856093;
+	static constexpr inline uint32_t primeNumber2 = 19349663;
+	static constexpr inline uint32_t primeNumber3 = 83492791;
+
+	using sorter_t = std::variant<
+		core::RadixLsbSorter<KeyAccessor::key_bit_count, uint16_t>,
+		core::RadixLsbSorter<KeyAccessor::key_bit_count, uint32_t>,
+		core::RadixLsbSorter<KeyAccessor::key_bit_count, size_t>>;
+	sorter_t m_sorter;
+
+	inline static sorter_t createSorter(size_t vertexCount)
+	{
+		if (vertexCount < (0x1ull << 16ull))
+			return core::RadixLsbSorter<KeyAccessor::key_bit_count, uint16_t>();
+		if (vertexCount < (0x1ull << 32ull))
+			return core::RadixLsbSorter<KeyAccessor::key_bit_count, uint32_t>();
+		return core::RadixLsbSorter<KeyAccessor::key_bit_count, size_t>();
+	}
+
+	collection_t m_vertices;
+	const uint32_t m_hashTableMaxSize;
+	const float m_cellSize;
+
+	inline uint32_t hash(const VertexData& vertex) const
+	{
+		const hlsl::float32_t3 position = floor(vertex.getPosition() / m_cellSize);
+		const auto position_uint32 = hlsl::uint32_t3(position.x, position.y, position.z);
+		return hash(position_uint32);
+	}
+
+	inline uint32_t hash(const hlsl::uint32_t3& position) const
+	{
+		return	((position.x * primeNumber1) ^
+			(position.y * primeNumber2) ^
+			(position.z * primeNumber3))& (m_hashTableMaxSize - 1);
+	}
+
+	inline uint8_t getNeighboringCellHashes(uint32_t* outNeighbors, hlsl::float32_t3 position) const
+	{
+		// We substract the coordinate by 0.5 since the cellSize is expected to be twice the epsilon. This is to snap the vertex into the cell that contain the most bottom left cell that could collide with of our vertex.
+		// -------          -------
+		// |  | y|          |  |  |
+		// |	|x |          |  |y |
+		// -------    ->    -------
+		// |	|  |          | x|  |
+		// |	|	 |          |  |  |
+		// -------          -------
+		// |2e|e|
+		// In the example,x is snapped into a different cell which is the most bottom left cell that could collide with x. Since we have move it into its bottom left candidate, there is no need to check to the bottom and to the left of the snapped coordinate. We only need to check the upper and to the right of the snapped cell, which include the original cell. Note that we do not need to check the upper and to the right of the original cell. The cell size is 2 * epsilon and x is located on the lower and lefter side of the cell.
+		// Contrary to x, y is still snapped into its original cell. It means the most bottom left cell that collide with y is its own cell.
+		// The above scheme is to reduce the number of cell candidates that we need to check for collision, from 9 cell to 4 cell in 2d, or from 27 cells to 8 cells in 3d.
+		// both 0.x and -0.x would be converted to 0 if we directly casting the position to unsigned integer. Causing the 0 to be crowded then the rest of the cells. So we use floor here to spread the vertex more uniformly.
+		hlsl::float32_t3 cellfloatcoord = floor(position / m_cellSize - hlsl::float32_t3(0.5));
+		hlsl::uint32_t3 baseCoord = hlsl::uint32_t3(static_cast<uint32_t>(cellfloatcoord.x), static_cast<uint32_t>(cellfloatcoord.y), static_cast<uint32_t>(cellfloatcoord.z));
+
+		uint8_t neighborCount = 0;
+
+		outNeighbors[neighborCount] = hash(baseCoord);
+		neighborCount++;
+
+		auto addUniqueNeighbor = [&neighborCount, outNeighbors](uint32_t hashval)
+		{
+			if (std::find(outNeighbors, outNeighbors + neighborCount, hashval) == outNeighbors + neighborCount)
+			{
+				outNeighbors[neighborCount] = hashval;
+				neighborCount++;
+			}
+		};
+
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 0, 1)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 0)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 0)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 0)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 0, 1)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(0, 1, 1)));
+		addUniqueNeighbor(hash(baseCoord + hlsl::uint32_t3(1, 1, 1)));
+
+		return neighborCount;
+	}
+
+	inline BucketBounds getBucketBoundsByHash(uint32_t hash) const
+	{
+		const auto skipListBound = std::visit([&](auto& sorter)
+		{
+			auto hashBound = sorter.getMostSignificantRadixBound(hash);
+			return std::pair<collection_t::const_iterator, collection_t::const_iterator>(m_vertices.begin() + hashBound.first, m_vertices.begin() + hashBound.second);
+		}, m_sorter);
+
+		auto begin = std::lower_bound(
+			skipListBound.first, 
+			skipListBound.second, 
+			hash,
+			[](const VertexData& vertex, uint32_t hash)
+			{
+				return vertex.getHash() < hash;
+			});
+
+		auto end = std::upper_bound(
+			skipListBound.first, 
+			skipListBound.second, 
+			hash, 
+			[](uint32_t hash, const VertexData& vertex)
+			{
+				return hash < vertex.getHash();
+			});
+
+		const auto beginIx = begin - m_vertices.begin();
+		const auto endIx = end - m_vertices.begin();
+		//bucket missing
+		if (begin == end)
+			return { m_vertices.end(), m_vertices.end() };
+
+		//bucket missing
+		if (begin->hash != hash)
+			return { m_vertices.end(), m_vertices.end() };
+
+		return { begin, end };
+	}
+};
+
+}
+#endif