webgpu_compute_rasterizer_ibl.html 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769
  1. <!DOCTYPE html>
  2. <html lang="en">
  3. <head>
  4. <title>three.js webgpu - compute rasterizer ibl</title>
  5. <meta charset="utf-8">
  6. <meta name="viewport" content="width=device-width, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0">
  7. <meta property="og:title" content="three.js webgpu - compute rasterizer ibl">
  8. <meta property="og:type" content="website">
  9. <meta property="og:url" content="https://threejs.org/examples/webgpu_compute_rasterizer_ibl.html">
  10. <meta property="og:image" content="https://threejs.org/examples/screenshots/webgpu_compute_rasterizer_ibl.jpg">
  11. <link type="text/css" rel="stylesheet" href="example.css">
  12. </head>
  13. <body>
  14. <div id="info">
  15. <a href="https://threejs.org/" target="_blank" rel="noopener" class="logo-link"></a>
  16. <div class="title-wrapper">
  17. <a href="https://threejs.org/" target="_blank" rel="noopener">three.js</a><span>GPU-Driven Compute Rasterizer — IBL</span>
  18. </div>
  19. <small>Rendering <span id="triangleCount"></span> triangles.<br/>Battle Damaged Sci-fi Helmet by <a href="https://sketchfab.com/theblueturtle_" target="_blank" rel="noopener">theblueturtle_</a></small>
  20. </div>
  21. <script type="importmap">
  22. {
  23. "imports": {
  24. "three": "../build/three.webgpu.js",
  25. "three/webgpu": "../build/three.webgpu.js",
  26. "three/tsl": "../build/three.tsl.js",
  27. "three/addons/": "./jsm/"
  28. }
  29. }
  30. </script>
  31. <script type="module">
  32. import * as THREE from 'three/webgpu';
  33. import { Fn, If, Loop, vec2, vec4, uvec2, uvec4, mat4, uint, float, int, min, max, clamp, ceil, log2, length, dFdx, dFdy, atomicMax, atomicAdd, atomicStore, atomicLoad, floor, cos, sin, dot, bool, storage, uniform, uniformArray, instanceIndex, vertexIndex, distance, screenSize, screenCoordinate, time, texture, varyingProperty, sqrt, normalize, cross, sign, positionGeometry, cameraViewMatrix, Discard, context, positionView, positionViewDirection, overrideNodes } from 'three/tsl';
  34. import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
  35. import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
  36. import { UltraHDRLoader } from 'three/addons/loaders/UltraHDRLoader.js';
  37. import { MeshoptClusterizer } from 'three/addons/libs/meshopt_clusterizer.module.js';
  38. import { MeshoptSimplifier } from 'three/addons/libs/meshopt_simplifier.module.js';
  39. import { Inspector } from 'three/addons/inspector/Inspector.js';
  40. import WebGPU from 'three/addons/capabilities/WebGPU.js';
  41. if ( WebGPU.isAvailable() === false ) {
  42. document.body.appendChild( WebGPU.getErrorMessage() );
  43. throw new Error( 'No WebGPU support' );
  44. }
  45. let camera, scene, renderer, controls;
  46. let computeRasterize, computeClear, computeFrustum, computeDispatch, computeHWArgs;
  47. let resolveMesh, hwMesh;
  48. let cameraPos, projScreenMatrixUniform, frustumPlanesUniform, cotHalfFovUniform;
  49. let prevProjScreenUniform;
  50. let outputModeUniform;
  51. let screenTriAttr, screenTriAtomic, screenTriRead;
  52. let screenInstAttr, screenInstAtomic, screenInstRead;
  53. let maxPixels;
  54. let sceneRT, blitQuad, blitTexNode;
  55. // Hierarchical Z pyramid (max depth per tile) for occlusion culling
  56. let depthSourceTexNode;
  57. let hzbBuffer, hzbRead, hzbLevelTable, hzbLevelCountUniform, hzbLevelCount = 0;
  58. let prevCameraPosUniform;
  59. const hzbKernels = [];
  60. const MAX_HZB_LEVELS = 16;
  61. const instanceCount = 129600; // 360x360 plane or 60x36x60 volume
  62. const MAX_RASTER_SIZE = 32;
  63. // Specular antialiasing — kernel roughness from normal variance, shared by
  64. // both rasterizer paths so their roughness matches at path boundaries
  65. const SPECULAR_AA_VARIANCE = 2.0;
  66. const SPECULAR_AA_MAX = 0.2;
  67. const options = { Output: 'Default', Rasterizer: 'Both', Grid: 'XZ' };
  68. // Buffer visibility packaging configuration — depth occupies the bits above each payload
  69. const TRIANGLE_INDEX_BITS = 16; // 2^16 = 65536 max triangles in the LOD mega buffer
  70. const INSTANCE_INDEX_BITS = 17; // 2^17 = 131072 max instances
  71. const TRIANGLE_INDEX_MASK = 2 ** TRIANGLE_INDEX_BITS - 1;
  72. const INSTANCE_INDEX_MASK = 2 ** INSTANCE_INDEX_BITS - 1;
  73. const DEPTH_TRI_MAX = 2 ** ( 32 - TRIANGLE_INDEX_BITS ) - 1; // 17-bit depth packed above the triangle index
  74. const DEPTH_INST_MAX = 2 ** ( 32 - INSTANCE_INDEX_BITS ) - 1; // 15-bit depth packed above the instance id
  75. const getVisColor = ( outputMode, normal, normalMap, uv, roughness, metalness, ao, emissive ) => {
  76. return Fn( () => {
  77. const result = vec4( 0.0 ).toVar();
  78. If( outputMode.equal( 1 ), () => {
  79. // Geometry Normal: map [-1, 1] to [0, 1]
  80. result.assign( vec4( normal.mul( 0.5 ).add( 0.5 ), 1.0 ) );
  81. } ).ElseIf( outputMode.equal( 2 ), () => {
  82. // Normal Map: map [-1, 1] to [0, 1]
  83. result.assign( vec4( normalMap.mul( 0.5 ).add( 0.5 ), 1.0 ) );
  84. } ).ElseIf( outputMode.equal( 3 ), () => {
  85. // UV
  86. result.assign( vec4( uv, 0.0, 1.0 ) );
  87. } ).ElseIf( outputMode.equal( 4 ), () => {
  88. // Roughness
  89. result.assign( vec4( roughness, roughness, roughness, 1.0 ) );
  90. } ).ElseIf( outputMode.equal( 5 ), () => {
  91. // Metalness
  92. result.assign( vec4( metalness, metalness, metalness, 1.0 ) );
  93. } ).ElseIf( outputMode.equal( 6 ), () => {
  94. // AO
  95. result.assign( vec4( ao, ao, ao, 1.0 ) );
  96. } ).ElseIf( outputMode.equal( 7 ), () => {
  97. // Emissive
  98. result.assign( vec4( emissive, 1.0 ) );
  99. } );
  100. return result;
  101. } )();
  102. };
  103. init();
  104. async function init() {
  105. renderer = new THREE.WebGPURenderer();
  106. renderer.toneMapping = THREE.ACESFilmicToneMapping;
  107. renderer.setPixelRatio( window.devicePixelRatio );
  108. renderer.setSize( window.innerWidth, window.innerHeight );
  109. renderer.setAnimationLoop( animate );
  110. renderer.inspector = new Inspector();
  111. document.body.appendChild( renderer.domElement );
  112. await renderer.init();
  113. camera = new THREE.PerspectiveCamera( 50, window.innerWidth / window.innerHeight, .25, 1000000 );
  114. controls = new OrbitControls( camera, renderer.domElement );
  115. controls.enableDamping = true;
  116. controls.zoomSpeed = .5;
  117. controls.maxDistance = 1000;
  118. // Load assets
  119. const [ gltf, envTexture ] = await Promise.all( [
  120. new GLTFLoader().loadAsync( 'models/gltf/DamagedHelmet/glTF/DamagedHelmet.gltf' ),
  121. new UltraHDRLoader().loadAsync( 'textures/equirectangular/royal_esplanade_2k.hdr.jpg' ),
  122. MeshoptClusterizer.ready,
  123. MeshoptSimplifier.ready
  124. ] );
  125. envTexture.mapping = THREE.EquirectangularReflectionMapping;
  126. let sourceMesh;
  127. gltf.scene.traverse( ( child ) => {
  128. if ( child.isMesh ) sourceMesh = child;
  129. } );
  130. const sourceMaterial = sourceMesh.material;
  131. // Bake the glTF node transform into the geometry (the helmet is authored z-up)
  132. gltf.scene.updateMatrixWorld( true );
  133. sourceMesh.geometry.applyMatrix4( sourceMesh.matrixWorld );
  134. // Generate LOD geometries and meshlets using Meshopt
  135. const lodTargets = [
  136. { ratio: 1.0, error: 0.0, weights: [ 0.25, 0.25, 0.25, 0.5, 0.5 ], flags: [] },
  137. { ratio: 0.55, error: 0.004, weights: [ 0.2, 0.2, 0.2, 0.35, 0.35 ], flags: [ 'RegularizeLight' ] },
  138. { ratio: 0.25, error: 0.015, weights: [ 0.12, 0.12, 0.12, 0.2, 0.2 ], flags: [ 'RegularizeLight' ] },
  139. { ratio: 0.1, error: 0.05, weights: [ 0.08, 0.08, 0.08, 0.12, 0.12 ], flags: [ 'RegularizeLight' ] },
  140. { ratio: 0.04, error: 0.14, weights: [ 0.04, 0.04, 0.04, 0.06, 0.06 ], flags: [ 'Regularize', 'Permissive' ] },
  141. { ratio: 0.015, error: 0.3, weights: [ 0.02, 0.02, 0.02, 0.03, 0.03 ], flags: [ 'Regularize', 'Permissive' ] }
  142. ];
  143. const geom = sourceMesh.geometry;
  144. geom.computeBoundingSphere();
  145. const boundingRadius = geom.boundingSphere.radius * 1.05;
  146. const posAttr = geom.attributes.position;
  147. const normAttr = geom.attributes.normal;
  148. const uvAttr = geom.attributes.uv;
  149. const vertexCount = posAttr.count;
  150. const simplifierAttributes = new Float32Array( vertexCount * 5 );
  151. for ( let i = 0; i < vertexCount; i ++ ) {
  152. simplifierAttributes[ i * 5 + 0 ] = normAttr.getX( i );
  153. simplifierAttributes[ i * 5 + 1 ] = normAttr.getY( i );
  154. simplifierAttributes[ i * 5 + 2 ] = normAttr.getZ( i );
  155. simplifierAttributes[ i * 5 + 3 ] = uvAttr.getX( i );
  156. simplifierAttributes[ i * 5 + 4 ] = uvAttr.getY( i );
  157. }
  158. const sourceIndices = geom.index ? new Uint32Array( geom.index.array ) : new Uint32Array( Array.from( { length: vertexCount }, ( _, i ) => i ) );
  159. const sourceScale = MeshoptSimplifier.getScale( posAttr.array, 3 );
  160. const lods = [];
  161. let totalChunks = 0;
  162. let indices = sourceIndices;
  163. let previousError = 0;
  164. for ( let i = 0; i < lodTargets.length; i ++ ) {
  165. let error = 0;
  166. if ( i > 0 ) {
  167. const target = lodTargets[ i ];
  168. const targetIndexCount = Math.max( 3, Math.floor( sourceIndices.length * target.ratio / 3 ) * 3 );
  169. const simplified = MeshoptSimplifier.simplifyWithAttributes(
  170. indices,
  171. posAttr.array,
  172. 3,
  173. simplifierAttributes,
  174. 5,
  175. target.weights,
  176. null,
  177. targetIndexCount,
  178. target.error,
  179. target.flags
  180. );
  181. if ( simplified[ 0 ].length >= 3 ) {
  182. indices = simplified[ 0 ];
  183. error = previousError + simplified[ 1 ] * sourceScale;
  184. } else {
  185. error = previousError;
  186. }
  187. }
  188. previousError = error;
  189. const meshletBuffers = MeshoptClusterizer.buildMeshlets(
  190. indices,
  191. posAttr.array,
  192. 3,
  193. 64,
  194. 64,
  195. 0.25
  196. );
  197. const bounds = MeshoptClusterizer.computeMeshletBounds( meshletBuffers, posAttr.array, 3 );
  198. const lod = {
  199. meshletBuffers,
  200. bounds,
  201. error,
  202. numChunks: meshletBuffers.meshletCount,
  203. numTriangles: meshletBuffers.meshletCount * 64, // Padded to exactly 64 triangles per chunk
  204. numVertices: vertexCount,
  205. vertexOffset: i * vertexCount,
  206. positions: posAttr,
  207. normals: normAttr,
  208. uvs: uvAttr
  209. };
  210. lods.push( lod );
  211. totalChunks += lod.numChunks;
  212. }
  213. console.info( 'LOD Meshlets count: ', lods.map( l => l.numChunks ) );
  214. const totalVertices = lods.length * vertexCount;
  215. const totalIndices = totalChunks * 64 * 3;
  216. if ( totalIndices / 3 > TRIANGLE_INDEX_MASK + 1 ) throw new Error( 'Triangle count exceeds payload bit budget' );
  217. if ( instanceCount > INSTANCE_INDEX_MASK + 1 ) throw new Error( 'Instance count exceeds payload bit budget' );
  218. const maxTrianglesPerInstance = lods[ 0 ].numTriangles;
  219. const totalTriangles = instanceCount * maxTrianglesPerInstance;
  220. document.getElementById( 'triangleCount' ).innerText = new Intl.NumberFormat().format( totalTriangles );
  221. const vertexArray = new Float32Array( totalVertices * 4 ); // vec4 padded
  222. const normalArray = new Float32Array( totalVertices * 4 ); // vec4 padded
  223. const uvArray = new Float32Array( totalVertices * 2 );
  224. const indexArray = new Uint32Array( totalIndices );
  225. const meshletTriangleArray = new Uint32Array( totalIndices / 3 ); // 1 meshlet ID per triangle
  226. const chunkBoundsData = new Float32Array( totalChunks * 4 ); // vec4: cx, cy, cz, radius
  227. let currentMeshletId = 1;
  228. let currentChunkId = 0;
  229. let currentIndexOffset = 0;
  230. for ( let i = 0; i < lods.length; i ++ ) {
  231. const lod = lods[ i ];
  232. lod.chunkStart = currentChunkId;
  233. lod.indexOffset = currentIndexOffset;
  234. // Fill vertex buffers for this LOD level
  235. for ( let v = 0; v < vertexCount; v ++ ) {
  236. const vIdx = lod.vertexOffset + v;
  237. vertexArray[ vIdx * 4 + 0 ] = lod.positions.getX( v );
  238. vertexArray[ vIdx * 4 + 1 ] = lod.positions.getY( v );
  239. vertexArray[ vIdx * 4 + 2 ] = lod.positions.getZ( v );
  240. vertexArray[ vIdx * 4 + 3 ] = 1.0;
  241. normalArray[ vIdx * 4 + 0 ] = lod.normals.getX( v );
  242. normalArray[ vIdx * 4 + 1 ] = lod.normals.getY( v );
  243. normalArray[ vIdx * 4 + 2 ] = lod.normals.getZ( v );
  244. uvArray[ vIdx * 2 + 0 ] = lod.uvs.getX( v );
  245. uvArray[ vIdx * 2 + 1 ] = lod.uvs.getY( v );
  246. }
  247. // Process and pack meshlets
  248. const meshletBuffers = lod.meshletBuffers;
  249. const bounds = lod.bounds;
  250. for ( let m = 0; m < lod.numChunks; m ++ ) {
  251. const meshlet = MeshoptClusterizer.extractMeshlet( meshletBuffers, m );
  252. const meshletTriangles = meshlet.triangles.length / 3;
  253. // Pack 64 triangles (with degenerate padding if needed)
  254. for ( let t = 0; t < 64; t ++ ) {
  255. const triIdx = ( lod.indexOffset / 3 ) + ( m * 64 ) + t;
  256. if ( t < meshletTriangles ) {
  257. const a_local = meshlet.triangles[ t * 3 + 0 ];
  258. const b_local = meshlet.triangles[ t * 3 + 1 ];
  259. const c_local = meshlet.triangles[ t * 3 + 2 ];
  260. indexArray[ triIdx * 3 + 0 ] = lod.vertexOffset + meshlet.vertices[ a_local ];
  261. indexArray[ triIdx * 3 + 1 ] = lod.vertexOffset + meshlet.vertices[ b_local ];
  262. indexArray[ triIdx * 3 + 2 ] = lod.vertexOffset + meshlet.vertices[ c_local ];
  263. } else {
  264. // Pad with degenerate triangle using the first vertex of the meshlet
  265. const a_local = meshlet.vertices[ 0 ];
  266. indexArray[ triIdx * 3 + 0 ] = lod.vertexOffset + a_local;
  267. indexArray[ triIdx * 3 + 1 ] = lod.vertexOffset + a_local;
  268. indexArray[ triIdx * 3 + 2 ] = lod.vertexOffset + a_local;
  269. }
  270. meshletTriangleArray[ triIdx ] = currentMeshletId;
  271. }
  272. currentMeshletId ++;
  273. // Bounding sphere
  274. chunkBoundsData[ currentChunkId * 4 + 0 ] = bounds[ m ].centerX;
  275. chunkBoundsData[ currentChunkId * 4 + 1 ] = bounds[ m ].centerY;
  276. chunkBoundsData[ currentChunkId * 4 + 2 ] = bounds[ m ].centerZ;
  277. chunkBoundsData[ currentChunkId * 4 + 3 ] = bounds[ m ].radius;
  278. currentChunkId ++;
  279. }
  280. currentIndexOffset += lod.numTriangles * 3;
  281. }
  282. // Upload LOD offsets to GPU (vec4: triangleStart, numTriangles, chunkStart, 0)
  283. const lodOffsetsUniform = uniformArray( lods.map( ( lod ) => new THREE.Vector4( lod.indexOffset / 3, lod.numTriangles, lod.chunkStart, 0 ) ), 'vec4' );
  284. const chunkBoundsBuffer = storage( new THREE.StorageBufferAttribute( chunkBoundsData, 4 ), 'vec4', totalChunks ).toReadOnly();
  285. // Storage Buffers
  286. const vertexBuffer = storage( new THREE.StorageBufferAttribute( vertexArray, 4 ), 'vec4', totalVertices ).toReadOnly();
  287. const normalBuffer = storage( new THREE.StorageBufferAttribute( normalArray, 4 ), 'vec4', totalVertices ).toReadOnly();
  288. const uvBuffer = storage( new THREE.StorageBufferAttribute( uvArray, 2 ), 'vec2', totalVertices ).toReadOnly();
  289. const indexBuffer = storage( new THREE.StorageBufferAttribute( indexArray, 1 ), 'uint', totalIndices ).toReadOnly();
  290. const meshletIdBuffer = storage( new THREE.StorageBufferAttribute( meshletTriangleArray, 1 ), 'uint', totalIndices / 3 ).toReadOnly();
  291. const timeScale = uniform( 1.0 );
  292. const occlusionBiasUniform = uniform( 0.0008 );
  293. const lodThresholdUniform = uniform( 3.0 );
  294. const parameterGroup = renderer.inspector.createParameters( 'Parameters' );
  295. parameterGroup.add( options, 'Output', {
  296. 'Default': 'Default',
  297. 'Meshlet Debug': 'Meshlet Debug',
  298. 'Geometry Normal': 'Geometry Normal',
  299. 'Normal Map': 'Normal Map',
  300. 'UV': 'UV',
  301. 'Roughness': 'Roughness',
  302. 'Metalness': 'Metalness',
  303. 'AO': 'AO',
  304. 'Emissive': 'Emissive'
  305. } ).addEventListener( 'change', updateMode );
  306. parameterGroup.add( options, 'Rasterizer', { 'SW Only': 'SW Only', 'HW Only': 'HW Only', 'Both': 'Both' } );
  307. const staticInstanceData = new Float32Array( instanceCount * 4 );
  308. const instanceDataAttr = new THREE.StorageBufferAttribute( staticInstanceData, 4 );
  309. const instanceDataBuffer = storage( instanceDataAttr, 'vec4', instanceCount );
  310. // Lay the instances out as a plane or a volume (same instance count)
  311. const updateGrid = () => {
  312. let dataIndex = 0;
  313. if ( options.Grid === 'XZ' ) {
  314. for ( let x = 0; x < 360; x ++ ) {
  315. for ( let z = 0; z < 360; z ++ ) {
  316. staticInstanceData[ dataIndex ++ ] = ( x - 180 ) * 4.0;
  317. staticInstanceData[ dataIndex ++ ] = - 1;
  318. staticInstanceData[ dataIndex ++ ] = ( z - 180 ) * 4.0;
  319. staticInstanceData[ dataIndex ++ ] = 1.0; // scale
  320. }
  321. }
  322. //camera.position.set( 0, 800, 3000 );
  323. camera.position.set( 0, 8, 30 );
  324. controls.target.set( 0, - 1, 0 );
  325. } else {
  326. for ( let x = 0; x < 60; x ++ ) {
  327. for ( let y = 0; y < 36; y ++ ) {
  328. for ( let z = 0; z < 60; z ++ ) {
  329. staticInstanceData[ dataIndex ++ ] = ( x - 30 ) * 4.0;
  330. staticInstanceData[ dataIndex ++ ] = ( y - 18 ) * 4.0;
  331. staticInstanceData[ dataIndex ++ ] = ( z - 30 ) * 4.0;
  332. staticInstanceData[ dataIndex ++ ] = 1.0; // scale
  333. }
  334. }
  335. }
  336. camera.position.set( 2, 2, 40 );
  337. controls.target.set( 0, 0, 0 );
  338. }
  339. instanceDataAttr.needsUpdate = true;
  340. };
  341. updateGrid();
  342. parameterGroup.add( options, 'Grid', { 'XZ': 'XZ', 'XYZ': 'XYZ' } ).addEventListener( 'change', updateGrid );
  343. parameterGroup.add( occlusionBiasUniform, 'value', 0.0, 0.0008 ).name( 'Occlusion Bias' ).step( 0.000001 );
  344. parameterGroup.add( lodThresholdUniform, 'value', 1, 15.0 ).name( 'LOD Threshold' ).step( 0.1 );
  345. parameterGroup.add( timeScale, 'value', 0.0, 1.0 ).name( 'Animation Speed' );
  346. // Packed visibility buffers — depth in the high bits, payload in the low bits,
  347. // so a single atomicMax resolves the depth test and the payload write together
  348. // and the winner is order-independent (no frame-to-frame flicker).
  349. // screenTri: depth(17) | megaTriangleIndex(15)
  350. // screenInst: depth(15) | instId(17)
  351. createScreenBuffers();
  352. const instanceWorldData = new Float32Array( instanceCount * 16 );
  353. const instanceMvpData = new Float32Array( instanceCount * 16 );
  354. const instanceWorldAttr = new THREE.StorageBufferAttribute( instanceWorldData, 16 );
  355. const instanceMvpAttr = new THREE.StorageBufferAttribute( instanceMvpData, 16 );
  356. const instanceWorldBuffer = storage( instanceWorldAttr, 'mat4', instanceCount );
  357. const instanceMvpBuffer = storage( instanceMvpAttr, 'mat4', instanceCount );
  358. const instanceWorldRead = storage( instanceWorldAttr, 'mat4', instanceCount ).toReadOnly();
  359. // Previous frame world matrices for the occlusion test
  360. const instancePrevWorldAttr = new THREE.StorageBufferAttribute( new Float32Array( instanceCount * 16 ), 16 );
  361. const instancePrevWorldBuffer = storage( instancePrevWorldAttr, 'mat4', instanceCount );
  362. const workQueueCountData = new Uint32Array( 1 );
  363. const workQueueCountAttr = new THREE.StorageBufferAttribute( workQueueCountData, 1 );
  364. const workQueueCountAtomic = storage( workQueueCountAttr, 'uint', 1 ).toAtomic();
  365. const workQueueCountRead = storage( workQueueCountAttr, 'uint', 1 ).toReadOnly();
  366. const dispatchData = new Uint32Array( 3 );
  367. const dispatchAttr = new THREE.IndirectStorageBufferAttribute( dispatchData, 3 );
  368. const dispatchBuffer = storage( dispatchAttr, 'uint', 3 );
  369. // Work queue budget — one item is a 64-triangle chunk of one visible instance
  370. const MAX_WORK_ITEMS = 2820000;
  371. const workQueueData = new Uint32Array( MAX_WORK_ITEMS * 4 );
  372. const workQueueBuffer = storage( new THREE.StorageBufferAttribute( workQueueData, 4 ), 'uvec4', MAX_WORK_ITEMS );
  373. // HW Rasterizer Buffers (for large triangles that exceed SW raster budget)
  374. const MAX_HW_TRIANGLES = 100000;
  375. // HW queue: index 0 is atomic counter, then stride-2 entries [instId, triIdx]
  376. const hwQueueData = new Uint32Array( 1 + MAX_HW_TRIANGLES * 2 );
  377. const hwQueueAttr = new THREE.StorageBufferAttribute( hwQueueData, 1 );
  378. const hwQueueAtomic = storage( hwQueueAttr, 'uint', 1 + MAX_HW_TRIANGLES * 2 ).toAtomic();
  379. const hwQueueRead = storage( hwQueueAttr, 'uint', 1 + MAX_HW_TRIANGLES * 2 ).toReadOnly();
  380. // Draw indirect buffer: vertexCount, instanceCount, firstVertex, firstInstance
  381. const hwDrawData = new Uint32Array( 4 );
  382. const hwDrawAttr = new THREE.IndirectStorageBufferAttribute( hwDrawData, 4 );
  383. const hwDrawBuffer = storage( hwDrawAttr, 'uint', 4 );
  384. projScreenMatrixUniform = uniform( new THREE.Matrix4() );
  385. prevProjScreenUniform = uniform( new THREE.Matrix4() );
  386. frustumPlanesUniform = uniformArray( [
  387. new THREE.Vector4(), new THREE.Vector4(), new THREE.Vector4(),
  388. new THREE.Vector4(), new THREE.Vector4(), new THREE.Vector4()
  389. ], 'vec4' );
  390. cameraPos = uniform( new THREE.Vector3() );
  391. cotHalfFovUniform = uniform( 1.0 );
  392. const maxRasterSizeUniform = uniform( MAX_RASTER_SIZE, 'int' ); // Max bounding box size in pixels for SW rasterizer
  393. prevCameraPosUniform = uniform( new THREE.Vector3() );
  394. outputModeUniform = uniform( 0, 'uint' );
  395. depthSourceTexNode = texture( sceneRT.depthTexture );
  396. // One kernel per pyramid level — each texel keeps the max (farthest)
  397. // depth of the 2x2 it covers, so a sphere is occluded when its nearest
  398. // depth is farther than the stored value
  399. for ( let k = 0; k < MAX_HZB_LEVELS; k ++ ) {
  400. const initialInfo = hzbLevelTable.array[ Math.min( k, hzbLevelCount - 1 ) ];
  401. hzbKernels.push( Fn( () => {
  402. const info = hzbLevelTable.element( k );
  403. const levelWidth = uint( info.y );
  404. const levelHeight = uint( info.z );
  405. const levelOffset = uint( info.x );
  406. If( instanceIndex.lessThan( levelWidth.mul( levelHeight ) ), () => {
  407. const x = instanceIndex.mod( levelWidth );
  408. const y = instanceIndex.div( levelWidth );
  409. const sx = x.mul( 2 );
  410. const sy = y.mul( 2 );
  411. const depthMax = float( 0.0 ).toVar();
  412. if ( k === 0 ) {
  413. // Source: the full resolution scene depth
  414. const sw = uint( screenSize.x ).sub( 1 );
  415. const sh = uint( screenSize.y ).sub( 1 );
  416. for ( let dy = 0; dy < 2; dy ++ ) {
  417. for ( let dx = 0; dx < 2; dx ++ ) {
  418. depthMax.assign( max( depthMax, depthSourceTexNode.load( uvec2( min( sx.add( dx ), sw ), min( sy.add( dy ), sh ) ) ).r ) );
  419. }
  420. }
  421. } else {
  422. // Source: the previous pyramid level
  423. const src = hzbLevelTable.element( k - 1 );
  424. const srcWidth = uint( src.y );
  425. const srcOffset = uint( src.x );
  426. const swMax = srcWidth.sub( 1 );
  427. const shMax = uint( src.z ).sub( 1 );
  428. for ( let dy = 0; dy < 2; dy ++ ) {
  429. for ( let dx = 0; dx < 2; dx ++ ) {
  430. const tx = min( sx.add( dx ), swMax );
  431. const ty = min( sy.add( dy ), shMax );
  432. depthMax.assign( max( depthMax, hzbBuffer.element( srcOffset.add( ty.mul( srcWidth ) ).add( tx ) ) ) );
  433. }
  434. }
  435. }
  436. hzbBuffer.element( levelOffset.add( y.mul( levelWidth ) ).add( x ) ).assign( depthMax );
  437. } );
  438. } )().compute( initialInfo.y * initialInfo.z, [ 64 ] ).setName( `HZB Level ${ k }` ) );
  439. }
  440. // Conservative sphere vs pyramid test, using the previous frame's
  441. // depth and matrices (the helmets only rotate in place, so their
  442. // bounding spheres are identical between frames)
  443. const sphereOccluded = ( center, radius ) => {
  444. const toCamera = prevCameraPosUniform.sub( center );
  445. const dist = length( toCamera );
  446. // Closest point on the sphere toward the camera
  447. const nearPoint = center.add( toCamera.div( dist ).mul( radius ) );
  448. const nearClip = prevProjScreenUniform.mul( vec4( nearPoint, 1.0 ) );
  449. const centerClip = prevProjScreenUniform.mul( vec4( center, 1.0 ) );
  450. const nearestZ = nearClip.z.div( nearClip.w );
  451. const ndc = centerClip.xy.div( centerClip.w );
  452. // Footprint in half resolution pyramid texels picks the level where
  453. // the sphere's diameter fits one texel, so the 2x2 window always covers it.
  454. // The 4 combines the NDC half-screen factor with the half resolution pyramid.
  455. const radiusTexels = radius.mul( cotHalfFovUniform ).mul( float( screenSize.y ) ).div( 4.0 ).div( dist );
  456. const level = int( clamp( ceil( log2( max( radiusTexels.mul( 2.0 ), 1.0 ) ) ), 0.0, hzbLevelCountUniform.sub( 1.0 ) ) );
  457. const info = hzbLevelTable.element( level );
  458. const levelWidth = uint( info.y );
  459. const levelHeight = uint( info.z );
  460. const levelOffset = uint( info.x );
  461. const px = ndc.x.mul( 0.5 ).add( 0.5 ).mul( float( levelWidth ) );
  462. const py = float( 0.5 ).sub( ndc.y.mul( 0.5 ) ).mul( float( levelHeight ) );
  463. const x0 = uint( clamp( px.sub( 0.5 ), 0.0, float( levelWidth.sub( 1 ) ) ) );
  464. const y0 = uint( clamp( py.sub( 0.5 ), 0.0, float( levelHeight.sub( 1 ) ) ) );
  465. const x1 = min( x0.add( 1 ), levelWidth.sub( 1 ) );
  466. const y1 = min( y0.add( 1 ), levelHeight.sub( 1 ) );
  467. const maxZ = max(
  468. max( hzbRead.element( levelOffset.add( y0.mul( levelWidth ) ).add( x0 ) ), hzbRead.element( levelOffset.add( y0.mul( levelWidth ) ).add( x1 ) ) ),
  469. max( hzbRead.element( levelOffset.add( y1.mul( levelWidth ) ).add( x0 ) ), hzbRead.element( levelOffset.add( y1.mul( levelWidth ) ).add( x1 ) ) )
  470. );
  471. //const bias = occlusionBiasUniform.mul( dist );
  472. const bias = occlusionBiasUniform;
  473. return dist.greaterThan( radius.mul( 2.0 ) ) // skip spheres close to the camera
  474. .and( nearClip.w.greaterThan( 0.0 ) )
  475. .and( centerClip.w.greaterThan( 0.0 ) )
  476. .and( nearestZ.greaterThan( maxZ.add( bias ) ) );
  477. };
  478. // Compute Clear
  479. computeClear = Fn( () => {
  480. atomicStore( screenTriAtomic.element( instanceIndex ), uint( 0 ) );
  481. atomicStore( screenInstAtomic.element( instanceIndex ), uint( 0 ) );
  482. If( instanceIndex.equal( 0 ), () => {
  483. atomicStore( workQueueCountAtomic.element( 0 ), uint( 0 ) );
  484. atomicStore( hwQueueAtomic.element( 0 ), uint( 0 ) );
  485. } );
  486. } )().compute( maxPixels, [ 256 ] ).setName( 'Compute Clear' );
  487. // Compute Frustum (GPU Culling, LOD & Work Allocation)
  488. computeFrustum = Fn( () => {
  489. // Keep last frame's transform for motion vectors
  490. instancePrevWorldBuffer.element( instanceIndex ).assign( instanceWorldBuffer.element( instanceIndex ) );
  491. const data = instanceDataBuffer.element( instanceIndex );
  492. const pos = data.xyz;
  493. const scale = data.w;
  494. const i = float( instanceIndex );
  495. // Rotation
  496. const rotY = time.mul( timeScale ).add( i );
  497. const c = cos( rotY );
  498. const s = sin( rotY );
  499. // Compose MatrixWorld
  500. const matrixWorld = mat4(
  501. vec4( c.mul( scale ), 0.0, s.mul( scale ), 0.0 ),
  502. vec4( 0.0, scale, 0.0, 0.0 ),
  503. vec4( s.negate().mul( scale ), 0.0, c.mul( scale ), 0.0 ),
  504. vec4( pos, 1.0 )
  505. );
  506. const visible = bool( true ).toVar();
  507. const radius = scale.mul( boundingRadius ); // bounding sphere radius
  508. // Frustum culling using the 6 extracted world-space planes
  509. Loop( { start: 0, end: 6 }, ( { i: planeIndex } ) => {
  510. const plane = frustumPlanesUniform.element( planeIndex );
  511. const dist = dot( plane.xyz, pos ).add( plane.w );
  512. If( dist.lessThan( radius.negate() ), () => {
  513. visible.assign( false );
  514. } );
  515. } );
  516. // Occlusion cull the whole instance against the depth pyramid
  517. If( visible, () => {
  518. visible.assign( sphereOccluded( pos, radius ).not() );
  519. } );
  520. If( visible, () => {
  521. const distToCamera = distance( cameraPos, pos );
  522. // Precompute projection factor once (Screen-Space Projected Error)
  523. // pixelError = cotHalfFov * errorWorld / dist * screenH / 2
  524. const pixelFactor = cotHalfFovUniform.div( max( 0.01, distToCamera ) ).mul( float( screenSize.y ) ).div( 2.0 );
  525. const lodLevel = uint( 0 ).toVar();
  526. let lodSelection = null;
  527. for ( let i = lods.length - 1; i > 0; i -- ) {
  528. const checkLod = float( lods[ i ].error ).mul( scale ).mul( pixelFactor ).lessThanEqual( lodThresholdUniform );
  529. if ( lodSelection === null ) {
  530. lodSelection = If( checkLod, () => {
  531. lodLevel.assign( i );
  532. } );
  533. } else {
  534. lodSelection = lodSelection.ElseIf( checkLod, () => {
  535. lodLevel.assign( i );
  536. } );
  537. }
  538. }
  539. const lodData = lodOffsetsUniform.element( lodLevel );
  540. const lodTriStart = uint( lodData.x );
  541. const lodNumTriangles = uint( lodData.y );
  542. const lodChunkStart = uint( lodData.z );
  543. // Calculate Work Items (64 triangles per item)
  544. const workItems = lodNumTriangles.add( 63 ).div( 64 );
  545. // Evaluate each Chunk (Cluster)
  546. Loop( { name: 'cIdx', type: 'uint', start: uint( 0 ), end: workItems, condition: '<' }, ( { cIdx: chunkIndex } ) => {
  547. const globalChunkId = lodChunkStart.add( uint( chunkIndex ) );
  548. const chunkBounds = chunkBoundsBuffer.element( globalChunkId );
  549. const chunkCenterLocal = chunkBounds.xyz;
  550. const chunkRadiusLocal = chunkBounds.w;
  551. // Transform chunk bounding sphere to world space and store as var to prevent inlining
  552. const chunkCenterWorld = matrixWorld.mul( vec4( chunkCenterLocal, 1.0 ) ).xyz.toVar();
  553. const chunkRadiusWorld = chunkRadiusLocal.mul( scale ).toVar();
  554. const chunkVisible = bool( true ).toVar();
  555. // Frustum cull the chunk
  556. Loop( { name: 'pIdx', start: 0, end: 6 }, ( { pIdx: planeIndex } ) => {
  557. const plane = frustumPlanesUniform.element( planeIndex );
  558. const chunkDist = dot( plane.xyz, chunkCenterWorld ).add( plane.w );
  559. If( chunkDist.lessThan( chunkRadiusWorld.negate() ), () => {
  560. chunkVisible.assign( false );
  561. } );
  562. } );
  563. // Occlusion cull the chunk, using its previous frame position
  564. // to stay consistent with the previous frame depth pyramid
  565. If( chunkVisible, () => {
  566. const chunkCenterPrev = instancePrevWorldBuffer.element( instanceIndex ).mul( vec4( chunkCenterLocal, 1.0 ) ).xyz.toVar();
  567. chunkVisible.assign( sphereOccluded( chunkCenterPrev, chunkRadiusWorld ).not() );
  568. } );
  569. If( chunkVisible, () => {
  570. const itemIndex = atomicAdd( workQueueCountAtomic.element( 0 ), 1 );
  571. If( itemIndex.lessThan( MAX_WORK_ITEMS ), () => {
  572. // uvec4( instanceIndex, triangleStart, lodNumTriangles, chunkIndex )
  573. workQueueBuffer.element( itemIndex ).assign(
  574. uvec4( instanceIndex, lodTriStart, lodNumTriangles, uint( chunkIndex ) )
  575. );
  576. } );
  577. } );
  578. } );
  579. // Store transform for this instance
  580. instanceWorldBuffer.element( instanceIndex ).assign( matrixWorld );
  581. instanceMvpBuffer.element( instanceIndex ).assign( projScreenMatrixUniform.mul( matrixWorld ) );
  582. } );
  583. } )().compute( instanceCount ).setName( 'Compute Frustum' );
  584. // Compute Dispatch (Indirect arguments)
  585. computeDispatch = Fn( () => {
  586. const totalWorkgroups = workQueueCountRead.element( 0 );
  587. const maxDim = uint( 65535 );
  588. // Split totalWorkgroups into 2D dispatch if it exceeds 65535
  589. const dispatchX = min( totalWorkgroups, maxDim );
  590. const dispatchY = totalWorkgroups.add( maxDim ).sub( 1 ).div( maxDim );
  591. dispatchBuffer.element( 0 ).assign( dispatchX );
  592. dispatchBuffer.element( 1 ).assign( dispatchY );
  593. dispatchBuffer.element( 2 ).assign( 1 );
  594. } )().compute( 1 ).setName( 'Compute Dispatch' );
  595. // Edge function for barycentric coordinates
  596. const edgeFunction = Fn( ( [ a, b, c ] ) => {
  597. // (c.y - a.y) * (b.x - a.x) - (c.x - a.x) * (b.y - a.y)
  598. return c.y.sub( a.y ).mul( b.x.sub( a.x ) ).sub( c.x.sub( a.x ).mul( b.y.sub( a.y ) ) );
  599. } );
  600. // Compute Rasterizer
  601. computeRasterize = Fn( () => {
  602. const totalWorkgroups = workQueueCountRead.element( 0 );
  603. const totalThreads = totalWorkgroups.mul( 64 );
  604. If( instanceIndex.lessThan( totalThreads ), () => {
  605. const workItemId = instanceIndex.div( 64 );
  606. const localTriangleIndex = instanceIndex.mod( 64 );
  607. const workItem = workQueueBuffer.element( workItemId );
  608. const instId = workItem.x;
  609. const lodTriStart = workItem.y;
  610. const lodNumTriangles = workItem.z;
  611. const chunkIndex = workItem.w;
  612. const globalTriangleIndex = chunkIndex.mul( 64 ).add( localTriangleIndex );
  613. If( globalTriangleIndex.lessThan( lodNumTriangles ), () => {
  614. const megaTriangleIndex = lodTriStart.add( globalTriangleIndex );
  615. const indexOffset = megaTriangleIndex.mul( 3 );
  616. const i0 = indexBuffer.element( indexOffset );
  617. const i1 = indexBuffer.element( indexOffset.add( 1 ) );
  618. const i2 = indexBuffer.element( indexOffset.add( 2 ) );
  619. const v0 = vertexBuffer.element( i0 );
  620. const v1 = vertexBuffer.element( i1 );
  621. const v2 = vertexBuffer.element( i2 );
  622. const instMvpMatrix = instanceMvpBuffer.element( instId );
  623. // MVP
  624. const p0 = instMvpMatrix.mul( v0 );
  625. const p1 = instMvpMatrix.mul( v1 );
  626. const p2 = instMvpMatrix.mul( v2 );
  627. // Near plane clipping
  628. If( p0.w.greaterThan( 0.0 ).and( p1.w.greaterThan( 0.0 ) ).and( p2.w.greaterThan( 0.0 ) ), () => {
  629. const ndc0 = p0.xyz.div( p0.w );
  630. const ndc1 = p1.xyz.div( p1.w );
  631. const ndc2 = p2.xyz.div( p2.w );
  632. // Early Backface Culling in NDC
  633. const areaNdc = edgeFunction( ndc0, ndc1, ndc2 );
  634. If( areaNdc.greaterThan( 0.0 ), () => {
  635. // NDC guard: skip triangles entirely outside clip volume
  636. const ndcMinX = min( ndc0.x, min( ndc1.x, ndc2.x ) );
  637. const ndcMaxX = max( ndc0.x, max( ndc1.x, ndc2.x ) );
  638. const ndcMinY = min( ndc0.y, min( ndc1.y, ndc2.y ) );
  639. const ndcMaxY = max( ndc0.y, max( ndc1.y, ndc2.y ) );
  640. If( ndcMaxX.greaterThan( - 1.0 ).and( ndcMinX.lessThan( 1.0 ) ).and( ndcMaxY.greaterThan( - 1.0 ) ).and( ndcMinY.lessThan( 1.0 ) ), () => {
  641. // Map to screen coordinates
  642. const w = screenSize.x;
  643. const h = screenSize.y;
  644. const s0 = ndc0.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  645. const s1 = ndc1.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  646. const s2 = ndc2.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  647. // Bounding Box
  648. const minX = max( 0.0, min( s0.x, min( s1.x, s2.x ) ) );
  649. const maxX = min( w.sub( 1.0 ), max( s0.x, max( s1.x, s2.x ) ) );
  650. const minY = max( 0.0, min( s0.y, min( s1.y, s2.y ) ) );
  651. const maxY = min( h.sub( 1.0 ), max( s0.y, max( s1.y, s2.y ) ) );
  652. const startX = int( floor( minX ) );
  653. const endX = int( floor( maxX ) );
  654. const startY = int( floor( minY ) );
  655. const endY = int( floor( maxY ) );
  656. // Big triangle guard: skip triangles larger than maxRasterSize
  657. // This is the key performance safeguard — software rasterizers
  658. // should only handle small triangles. Large triangles cause O(n²)
  659. // pixel iteration per thread, which kills performance when close.
  660. const bbWidth = endX.sub( startX );
  661. const bbHeight = endY.sub( startY );
  662. // HW path payloads — stored as two separate uint entries to
  663. // avoid the 32-bit packing limit of instId + triIdx
  664. // Sub-pixel / Valid bounds rejection + big triangle guard
  665. If( startX.lessThanEqual( endX ).and( startY.lessThanEqual( endY ) ).and( bbWidth.lessThanEqual( maxRasterSizeUniform ) ).and( bbHeight.lessThanEqual( maxRasterSizeUniform ) ), () => {
  666. const area = edgeFunction( s0, s1, s2 );
  667. const stepX_w0 = s1.y.sub( s2.y );
  668. const stepY_w0 = s2.x.sub( s1.x );
  669. const stepX_w1 = s2.y.sub( s0.y );
  670. const stepY_w1 = s0.x.sub( s2.x );
  671. const stepX_w2 = s0.y.sub( s1.y );
  672. const stepY_w2 = s1.x.sub( s0.x );
  673. // Top-Left rule check for each edge to guarantee watertightness
  674. const isTopLeft0 = stepX_w0.lessThan( 0.0 ).or( stepX_w0.equal( 0.0 ).and( stepY_w0.greaterThan( 0.0 ) ) );
  675. const isTopLeft1 = stepX_w1.lessThan( 0.0 ).or( stepX_w1.equal( 0.0 ).and( stepY_w1.greaterThan( 0.0 ) ) );
  676. const isTopLeft2 = stepX_w2.lessThan( 0.0 ).or( stepX_w2.equal( 0.0 ).and( stepY_w2.greaterThan( 0.0 ) ) );
  677. const bias0 = isTopLeft0.select( 0.0, - 1e-5 );
  678. const bias1 = isTopLeft1.select( 0.0, - 1e-5 );
  679. const bias2 = isTopLeft2.select( 0.0, - 1e-5 );
  680. const pStart = vec2( float( startX ).add( 0.5 ), float( startY ).add( 0.5 ) );
  681. const row_w0 = edgeFunction( s1, s2, pStart ).toVar();
  682. const row_w1 = edgeFunction( s2, s0, pStart ).toVar();
  683. const row_w2 = edgeFunction( s0, s1, pStart ).toVar();
  684. row_w0.addAssign( bias0 );
  685. row_w1.addAssign( bias1 );
  686. row_w2.addAssign( bias2 );
  687. // Incremental Z Math (ALU Optimization)
  688. const b0_start = row_w0.div( area );
  689. const b1_start = row_w1.div( area );
  690. const b2_start = row_w2.div( area );
  691. const row_z = b0_start.mul( ndc0.z ).add( b1_start.mul( ndc1.z ) ).add( b2_start.mul( ndc2.z ) ).toVar();
  692. const stepX_z = stepX_w0.div( area ).mul( ndc0.z ).add( stepX_w1.div( area ).mul( ndc1.z ) ).add( stepX_w2.div( area ).mul( ndc2.z ) );
  693. const stepY_z = stepY_w0.div( area ).mul( ndc0.z ).add( stepY_w1.div( area ).mul( ndc1.z ) ).add( stepY_w2.div( area ).mul( ndc2.z ) );
  694. Loop( { name: 'y', type: 'int', start: startY, end: endY, condition: '<=' }, ( { y } ) => {
  695. const w0 = row_w0.toVar();
  696. const w1 = row_w1.toVar();
  697. const w2 = row_w2.toVar();
  698. const z = row_z.toVar();
  699. Loop( { name: 'x', type: 'int', start: startX, end: endX, condition: '<=' }, ( { x } ) => {
  700. If( w0.greaterThanEqual( 0.0 ).and( w1.greaterThanEqual( 0.0 ) ).and( w2.greaterThanEqual( 0.0 ) ), () => {
  701. If( z.greaterThanEqual( 0.0 ).and( z.lessThanEqual( 1.0 ) ), () => {
  702. // Depth (fourth-root distribution) packed above each payload's bits
  703. const zEncoded = sqrt( sqrt( float( 1.0 ).sub( z ) ) );
  704. const depthTri = uint( zEncoded.mul( DEPTH_TRI_MAX ) );
  705. const depthInst = uint( zEncoded.mul( DEPTH_INST_MAX ) );
  706. const packedTri = depthTri.shiftLeft( TRIANGLE_INDEX_BITS ).bitOr( megaTriangleIndex.bitAnd( TRIANGLE_INDEX_MASK ) );
  707. const packedInst = depthInst.shiftLeft( INSTANCE_INDEX_BITS ).bitOr( instId );
  708. const pixelIndex = uint( y ).mul( uint( screenSize.x ) ).add( uint( x ) );
  709. // Early depth pre-check: skip the atomics if the pixel already has a closer fragment
  710. const currentDepth = atomicLoad( screenTriAtomic.element( pixelIndex ) ).shiftRight( TRIANGLE_INDEX_BITS );
  711. If( depthTri.greaterThanEqual( currentDepth ), () => {
  712. // Depth occupies the high bits, so atomicMax resolves the depth
  713. // test and the payload write in one order-independent step
  714. atomicMax( screenTriAtomic.element( pixelIndex ), packedTri );
  715. atomicMax( screenInstAtomic.element( pixelIndex ), packedInst );
  716. } );
  717. } );
  718. } );
  719. w0.addAssign( stepX_w0 );
  720. w1.addAssign( stepX_w1 );
  721. w2.addAssign( stepX_w2 );
  722. z.addAssign( stepX_z );
  723. } );
  724. row_w0.addAssign( stepY_w0 );
  725. row_w1.addAssign( stepY_w1 );
  726. row_w2.addAssign( stepY_w2 );
  727. row_z.addAssign( stepY_z );
  728. } );
  729. } ).Else( () => {
  730. // Big triangle → enqueue for HW rasterization
  731. If( startX.lessThanEqual( endX ).and( startY.lessThanEqual( endY ) ), () => {
  732. const hwCount = atomicAdd( hwQueueAtomic.element( 0 ), 1 );
  733. If( hwCount.lessThan( MAX_HW_TRIANGLES ), () => {
  734. const hwSlot = hwCount.mul( 2 ).add( 1 );
  735. atomicStore( hwQueueAtomic.element( hwSlot ), instId );
  736. atomicStore( hwQueueAtomic.element( hwSlot.add( 1 ) ), megaTriangleIndex );
  737. } );
  738. } );
  739. } );
  740. } );
  741. } ); // End Early Backface Culling
  742. } ); // End Near Plane Clipping
  743. } ); // End globalTriangleIndex bounds check
  744. } ); // End instanceIndex bounds check
  745. } )().compute( dispatchAttr ).setName( 'Compute Rasterize' );
  746. // Compute HW Draw Indirect Args
  747. computeHWArgs = Fn( () => {
  748. const hwCount = atomicLoad( hwQueueAtomic.element( 0 ) );
  749. // Non-indexed draw: vertexCount = hwCount * 3 (3 verts per triangle)
  750. hwDrawBuffer.element( 0 ).assign( hwCount.mul( 3 ) ); // vertexCount
  751. hwDrawBuffer.element( 1 ).assign( uint( 1 ) ); // instanceCount
  752. hwDrawBuffer.element( 2 ).assign( uint( 0 ) ); // firstVertex
  753. hwDrawBuffer.element( 3 ).assign( uint( 0 ) ); // firstInstance
  754. } )().compute( 1 ).setName( 'Compute HW Args' );
  755. // Hash function for meshlet colors (shared between HW mesh and fullscreen resolve)
  756. const hashColor = Fn( ( [ id_in ] ) => {
  757. let id = uint( id_in ).toVar();
  758. id = id.mul( uint( 747796405 ) ).add( uint( 289559509 ) );
  759. id = id.shiftRight( 16 ).bitXor( id ).mul( uint( 277803737 ) );
  760. id = id.shiftRight( 16 ).bitXor( id );
  761. const r = float( id.bitAnd( uint( 255 ) ) ).div( 255.0 );
  762. const g = float( id.shiftRight( 8 ).bitAnd( uint( 255 ) ) ).div( 255.0 );
  763. const b = float( id.shiftRight( 16 ).bitAnd( uint( 255 ) ) ).div( 255.0 );
  764. return vec4( r.mul( 0.8 ).add( 0.2 ), g.mul( 0.8 ).add( 0.2 ), b.mul( 0.8 ).add( 0.2 ), 1.0 );
  765. } );
  766. // Tangent from the triangle's world-space edges and UVs,
  767. // for normal mapping without precomputed tangents
  768. const computeTangent = ( w0, w1, w2, uv0, uv1, uv2, normal ) => {
  769. const dp1 = w1.sub( w0 );
  770. const dp2 = w2.sub( w0 );
  771. const duv1 = uv1.sub( uv0 );
  772. const duv2 = uv2.sub( uv0 );
  773. const det = duv1.x.mul( duv2.y ).sub( duv1.y.mul( duv2.x ) );
  774. const tangentRaw = dp1.mul( duv2.y ).sub( dp2.mul( duv1.y ) ).mul( sign( det ) );
  775. // Orthonormalize against the (smooth) normal
  776. return normalize( tangentRaw.sub( normal.mul( dot( normal, tangentRaw ) ) ) );
  777. };
  778. const applyNormalMap = ( normal, tangent, mapSample ) => {
  779. const bitangent = cross( normal, tangent );
  780. const mapN = mapSample.xyz.mul( 2.0 ).sub( 1.0 );
  781. return normalize( tangent.mul( mapN.x ).add( bitangent.mul( mapN.y ) ).add( normal.mul( mapN.z ) ) );
  782. };
  783. // Scene — the resolve pass and the HW mesh share it, so both are lit
  784. // by the same environment through the standard material pipeline
  785. scene = new THREE.Scene();
  786. scene.background = envTexture;
  787. scene.backgroundBlurriness = 0.5;
  788. scene.environment = envTexture;
  789. // HW Rasterizer Mesh (renders big triangles via the GPU hardware pipeline)
  790. // Unlike the SW rasterizer which writes to an atomic screen buffer,
  791. // the HW mesh renders directly with hardware depth testing.
  792. // It renders AFTER the fullscreen resolve, overlaying HW-rasterized triangles.
  793. {
  794. // Geometry: dummy positions, vertex count driven by indirect draw
  795. const hwGeometry = new THREE.BufferGeometry();
  796. hwGeometry.setAttribute( 'position', new THREE.Float32BufferAttribute( new Float32Array( MAX_HW_TRIANGLES * 3 * 3 ), 3 ) );
  797. hwGeometry.setIndirect( hwDrawAttr );
  798. hwGeometry.boundingSphere = new THREE.Sphere().set( new THREE.Vector3(), Infinity );
  799. // Varyings from the vertex pulling stage
  800. const vInstId = varyingProperty( 'uint', 'vInstId' );
  801. const vMegaTriIdx = varyingProperty( 'uint', 'vMegaTriIdx' );
  802. const vUv = varyingProperty( 'vec2', 'vUv' );
  803. const vNormal = varyingProperty( 'vec3', 'vNormal' );
  804. const vTangent = varyingProperty( 'vec3', 'vTangent' );
  805. // Vertex pulling shared by both HW materials
  806. const hwPosition = Fn( () => {
  807. // vertexIndex: 0,1,2, 3,4,5, 6,7,8, ...
  808. const triIndex = vertexIndex.div( 3 ); // which triangle in HW queue
  809. const localVert = vertexIndex.mod( 3 ); // which vertex (0, 1, 2)
  810. const hwSlot = triIndex.mul( 2 ).add( 1 );
  811. const instId = hwQueueRead.element( hwSlot );
  812. const megaTriIdx = hwQueueRead.element( hwSlot.add( 1 ) );
  813. const matrixWorld = instanceWorldRead.element( instId );
  814. const indexOffset = megaTriIdx.mul( 3 );
  815. const i0 = indexBuffer.element( indexOffset );
  816. const i1 = indexBuffer.element( indexOffset.add( 1 ) );
  817. const i2 = indexBuffer.element( indexOffset.add( 2 ) );
  818. // World-space corners for the tangent frame
  819. const w0 = matrixWorld.mul( vertexBuffer.element( i0 ) ).xyz;
  820. const w1 = matrixWorld.mul( vertexBuffer.element( i1 ) ).xyz;
  821. const w2 = matrixWorld.mul( vertexBuffer.element( i2 ) ).xyz;
  822. // This vertex's position, normal and uv
  823. const vertGlobalIdx = indexBuffer.element( indexOffset.add( localVert ) );
  824. const worldPos = localVert.equal( 1 ).select( w1, localVert.equal( 2 ).select( w2, w0 ) );
  825. const worldNormal = normalize( matrixWorld.mul( vec4( normalBuffer.element( vertGlobalIdx ).xyz, 0.0 ) ).xyz );
  826. const uv0 = uvBuffer.element( i0 );
  827. const uv1 = uvBuffer.element( i1 );
  828. const uv2 = uvBuffer.element( i2 );
  829. const uvVal = localVert.equal( 1 ).select( uv1, localVert.equal( 2 ).select( uv2, uv0 ) );
  830. vInstId.assign( instId );
  831. vMegaTriIdx.assign( megaTriIdx );
  832. vUv.assign( uvVal );
  833. vNormal.assign( worldNormal );
  834. vTangent.assign( computeTangent( w0, w1, w2, uv0, uv1, uv2, worldNormal ) );
  835. return worldPos;
  836. } )();
  837. // Shaded: the standard material pipeline lights the pulled geometry
  838. const sampleMapHW = ( map ) => texture( map, vUv );
  839. // Specular antialiasing from hardware derivatives of the geometric normal
  840. const hwNormal = normalize( vNormal );
  841. const hwDNdx = dFdx( hwNormal );
  842. const hwDNdy = dFdy( hwNormal );
  843. const hwKernelRoughness = min( hwDNdx.dot( hwDNdx ).add( hwDNdy.dot( hwDNdy ) ).mul( SPECULAR_AA_VARIANCE ), SPECULAR_AA_MAX );
  844. const hwShadedMaterial = new THREE.MeshStandardNodeMaterial();
  845. hwShadedMaterial.positionNode = hwPosition;
  846. hwShadedMaterial.colorNode = sampleMapHW( sourceMaterial.map );
  847. hwShadedMaterial.normalNode = applyNormalMap( hwNormal, normalize( vTangent ), sampleMapHW( sourceMaterial.normalMap ) ).transformDirection( cameraViewMatrix );
  848. const metalRoughHW = sampleMapHW( sourceMaterial.roughnessMap ); // glTF packs roughness (g) and metalness (b) in one texture
  849. hwShadedMaterial.roughnessNode = sqrt( metalRoughHW.g.mul( metalRoughHW.g ).add( hwKernelRoughness ) );
  850. hwShadedMaterial.metalnessNode = metalRoughHW.b;
  851. hwShadedMaterial.aoNode = sampleMapHW( sourceMaterial.aoMap ).r;
  852. hwShadedMaterial.emissiveNode = sampleMapHW( sourceMaterial.emissiveMap ).rgb;
  853. // Meshlet debug: flat colors per cluster
  854. const hwDebugMaterial = new THREE.NodeMaterial();
  855. hwDebugMaterial.positionNode = hwPosition;
  856. hwDebugMaterial.fragmentNode = Fn( () => {
  857. const meshletId = meshletIdBuffer.element( vMegaTriIdx ).add( vInstId.mul( 1000 ) );
  858. return hashColor( meshletId );
  859. } )();
  860. // Vis material: unlit visualization of channels
  861. const hwVisMaterial = new THREE.NodeMaterial();
  862. hwVisMaterial.positionNode = hwPosition;
  863. hwVisMaterial.fragmentNode = getVisColor(
  864. outputModeUniform,
  865. hwNormal,
  866. applyNormalMap( hwNormal, normalize( vTangent ), sampleMapHW( sourceMaterial.normalMap ) ),
  867. vUv,
  868. metalRoughHW.g,
  869. metalRoughHW.b,
  870. sampleMapHW( sourceMaterial.aoMap ).r,
  871. sampleMapHW( sourceMaterial.emissiveMap ).rgb
  872. );
  873. hwMesh = new THREE.Mesh( hwGeometry, hwShadedMaterial );
  874. hwMesh.userData.shadedMaterial = hwShadedMaterial;
  875. hwMesh.userData.debugMaterial = hwDebugMaterial;
  876. hwMesh.userData.visMaterial = hwVisMaterial;
  877. hwMesh.frustumCulled = false;
  878. hwMesh.renderOrder = 2;
  879. scene.add( hwMesh );
  880. }
  881. // Fullscreen Resolve Pass
  882. // A fullscreen triangle rendered through the scene camera. Using vertexNode
  883. // makes positionView reconstruct per fragment from clip space, so the standard
  884. // lighting pipeline (environment + lights) can shade the visibility buffer.
  885. {
  886. const resolveGeometry = new THREE.BufferGeometry();
  887. resolveGeometry.setAttribute( 'position', new THREE.Float32BufferAttribute( new Float32Array( [ - 1, - 1, 0, 3, - 1, 0, - 1, 3, 0 ] ), 3 ) );
  888. resolveGeometry.boundingSphere = new THREE.Sphere().set( new THREE.Vector3(), Infinity );
  889. // Shared reconstruction — built once, referenced by every material slot;
  890. // identical node instances are emitted only once in the final shader
  891. // The rasterizer addresses the screen bottom-up, screenCoordinate is top-down
  892. const flippedY = float( screenSize.y ).sub( screenCoordinate.y );
  893. const pixelIndex = uint( flippedY ).mul( uint( screenSize.x ) ).add( uint( screenCoordinate.x ) );
  894. const packedTri = screenTriRead.element( pixelIndex );
  895. const megaTriangleIndex = packedTri.bitAnd( TRIANGLE_INDEX_MASK );
  896. const instId = screenInstRead.element( pixelIndex ).bitAnd( INSTANCE_INDEX_MASK );
  897. // Visibility Buffer: Fetch exact vertices, normals and UVs
  898. const i0 = indexBuffer.element( megaTriangleIndex.mul( 3 ).add( 0 ) );
  899. const i1 = indexBuffer.element( megaTriangleIndex.mul( 3 ).add( 1 ) );
  900. const i2 = indexBuffer.element( megaTriangleIndex.mul( 3 ).add( 2 ) );
  901. const matrixWorld = instanceWorldRead.element( instId );
  902. const w0 = matrixWorld.mul( vertexBuffer.element( i0 ) ).xyz;
  903. const w1 = matrixWorld.mul( vertexBuffer.element( i1 ) ).xyz;
  904. const w2 = matrixWorld.mul( vertexBuffer.element( i2 ) ).xyz;
  905. const t_uv0 = uvBuffer.element( i0 );
  906. const t_uv1 = uvBuffer.element( i1 );
  907. const t_uv2 = uvBuffer.element( i2 );
  908. // Project Vertices to Screen Space
  909. const p0 = projScreenMatrixUniform.mul( vec4( w0, 1.0 ) );
  910. const p1 = projScreenMatrixUniform.mul( vec4( w1, 1.0 ) );
  911. const p2 = projScreenMatrixUniform.mul( vec4( w2, 1.0 ) );
  912. const ndc0 = p0.xyz.div( p0.w );
  913. const ndc1 = p1.xyz.div( p1.w );
  914. const ndc2 = p2.xyz.div( p2.w );
  915. const w = screenSize.x;
  916. const h = screenSize.y;
  917. const s0 = ndc0.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  918. const s1 = ndc1.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  919. const s2 = ndc2.xy.add( 1.0 ).mul( 0.5 ).mul( vec2( w, h ) );
  920. const p = vec2( screenCoordinate.x, flippedY );
  921. // Compute Barycentrics
  922. const area = edgeFunction( s0, s1, s2 );
  923. const w0b = edgeFunction( s1, s2, p );
  924. const w1b = edgeFunction( s2, s0, p );
  925. const w2b = edgeFunction( s0, s1, p );
  926. // Guard against division by zero for safe execution
  927. const safeArea = area.equal( 0.0 ).select( 1.0, area );
  928. const b0 = w0b.div( safeArea );
  929. const b1 = w1b.div( safeArea );
  930. const b2 = w2b.div( safeArea );
  931. // Perspective correct interpolation (32-bit floats!)
  932. const z_inv = b0.div( p0.w ).add( b1.div( p1.w ) ).add( b2.div( p2.w ) );
  933. const safeZInv = z_inv.equal( 0.0 ).select( 1.0, z_inv );
  934. const b0_p = b0.div( p0.w ).div( safeZInv );
  935. const b1_p = b1.div( p1.w ).div( safeZInv );
  936. const b2_p = b2.div( p2.w ).div( safeZInv );
  937. const uv_interp = t_uv0.mul( b0_p ).add( t_uv1.mul( b1_p ) ).add( t_uv2.mul( b2_p ) );
  938. const n0 = matrixWorld.mul( vec4( normalBuffer.element( i0 ).xyz, 0.0 ) ).xyz;
  939. const n1 = matrixWorld.mul( vec4( normalBuffer.element( i1 ).xyz, 0.0 ) ).xyz;
  940. const n2 = matrixWorld.mul( vec4( normalBuffer.element( i2 ).xyz, 0.0 ) ).xyz;
  941. const normal_interp = normalize( n0.mul( b0_p ).add( n1.mul( b1_p ) ).add( n2.mul( b2_p ) ) );
  942. const worldPosition = w0.mul( b0_p ).add( w1.mul( b1_p ) ).add( w2.mul( b2_p ) );
  943. const positionViewHelmet = cameraViewMatrix.mul( vec4( worldPosition, 1.0 ) ).xyz;
  944. const positionViewDirectionHelmet = positionViewHelmet.negate().normalize();
  945. // Compute screen-space derivatives analytically (neighboring pixels can
  946. // belong to different triangles, so hardware derivatives are unusable)
  947. const dw0_dx = s2.y.sub( s1.y );
  948. const dw1_dx = s0.y.sub( s2.y );
  949. const dw2_dx = s1.y.sub( s0.y );
  950. const dw0_dy = s1.x.sub( s2.x );
  951. const dw1_dy = s2.x.sub( s0.x );
  952. const dw2_dy = s0.x.sub( s1.x );
  953. const q0 = float( 1.0 ).div( p0.w );
  954. const q1 = float( 1.0 ).div( p1.w );
  955. const q2 = float( 1.0 ).div( p2.w );
  956. const sum_w_q = w0b.mul( q0 ).add( w1b.mul( q1 ) ).add( w2b.mul( q2 ) );
  957. const safe_sum_w_q = sum_w_q.equal( 0.0 ).select( 1.0, sum_w_q );
  958. const dUvDx = (
  959. dw0_dx.mul( q0 ).mul( t_uv0.sub( uv_interp ) )
  960. .add( dw1_dx.mul( q1 ).mul( t_uv1.sub( uv_interp ) ) )
  961. .add( dw2_dx.mul( q2 ).mul( t_uv2.sub( uv_interp ) ) )
  962. ).div( safe_sum_w_q );
  963. const dUvDy = (
  964. dw0_dy.mul( q0 ).mul( t_uv0.sub( uv_interp ) )
  965. .add( dw1_dy.mul( q1 ).mul( t_uv1.sub( uv_interp ) ) )
  966. .add( dw2_dy.mul( q2 ).mul( t_uv2.sub( uv_interp ) ) )
  967. ).div( safe_sum_w_q );
  968. // Sample with explicit gradients
  969. const sampleMap = ( map ) => texture( map, uv_interp ).grad( dUvDx, dUvDy );
  970. // Specular antialiasing (Tokuyoshi & Kaplanyan) — widen roughness by the
  971. // normal's screen-space variance so sub-pixel geometry does not alias
  972. // into fireflies. The derivatives are analytic, like the UV gradients.
  973. const dNdx = (
  974. dw0_dx.mul( q0 ).mul( n0.sub( normal_interp ) )
  975. .add( dw1_dx.mul( q1 ).mul( n1.sub( normal_interp ) ) )
  976. .add( dw2_dx.mul( q2 ).mul( n2.sub( normal_interp ) ) )
  977. ).div( safe_sum_w_q );
  978. const dNdy = (
  979. dw0_dy.mul( q0 ).mul( n0.sub( normal_interp ) )
  980. .add( dw1_dy.mul( q1 ).mul( n1.sub( normal_interp ) ) )
  981. .add( dw2_dy.mul( q2 ).mul( n2.sub( normal_interp ) ) )
  982. ).div( safe_sum_w_q );
  983. const kernelRoughness = min( dNdx.dot( dNdx ).add( dNdy.dot( dNdy ) ).mul( SPECULAR_AA_VARIANCE ), SPECULAR_AA_MAX );
  984. // Discard pixels the rasterizer did not cover so the background shows through
  985. const coveredColor = ( colorNode ) => Fn( () => {
  986. If( packedTri.shiftRight( TRIANGLE_INDEX_BITS ).equal( 0 ), () => {
  987. Discard();
  988. } );
  989. return colorNode;
  990. } )();
  991. // Output depth so the HW mesh can depth test against the SW result
  992. const resolveDepth = Fn( () => {
  993. // Depth lives in the high 17 bits of the packed value
  994. const depthTri = packedTri.shiftRight( TRIANGLE_INDEX_BITS );
  995. // Reconstruct NDC Z from non-linear depth (fourth-root distribution)
  996. const y = float( depthTri ).div( DEPTH_TRI_MAX );
  997. const y2 = y.mul( y );
  998. const v = y2.mul( y2 ); // raise to the fourth power (y^4) to get original v
  999. return float( 1.0 ).sub( v );
  1000. } )();
  1001. const fullscreenVertex = vec4( positionGeometry.xy, 0.0, 1.0 );
  1002. // Shaded: feed the reconstructed surface into the standard material pipeline
  1003. const resolveShadedMaterial = new THREE.MeshStandardNodeMaterial();
  1004. resolveShadedMaterial.contextNode =
  1005. overrideNodes( [
  1006. [ positionView, positionViewHelmet ],
  1007. [ positionViewDirection, positionViewDirectionHelmet ]
  1008. ] );
  1009. resolveShadedMaterial.vertexNode = fullscreenVertex;
  1010. resolveShadedMaterial.depthNode = resolveDepth;
  1011. resolveShadedMaterial.colorNode = coveredColor( sampleMap( sourceMaterial.map ) );
  1012. resolveShadedMaterial.normalNode = applyNormalMap(
  1013. normal_interp,
  1014. computeTangent( w0, w1, w2, t_uv0, t_uv1, t_uv2, normal_interp ),
  1015. sampleMap( sourceMaterial.normalMap )
  1016. ).transformDirection( cameraViewMatrix );
  1017. const metalRough = sampleMap( sourceMaterial.roughnessMap ); // glTF packs roughness (g) and metalness (b) in one texture
  1018. resolveShadedMaterial.roughnessNode = sqrt( metalRough.g.mul( metalRough.g ).add( kernelRoughness ) );
  1019. resolveShadedMaterial.metalnessNode = metalRough.b;
  1020. resolveShadedMaterial.aoNode = sampleMap( sourceMaterial.aoMap ).r;
  1021. resolveShadedMaterial.emissiveNode = sampleMap( sourceMaterial.emissiveMap ).rgb;
  1022. // Meshlet debug: flat colors per cluster
  1023. const resolveDebugMaterial = new THREE.NodeMaterial();
  1024. resolveDebugMaterial.vertexNode = fullscreenVertex;
  1025. resolveDebugMaterial.depthNode = resolveDepth;
  1026. resolveDebugMaterial.fragmentNode = coveredColor( hashColor( meshletIdBuffer.element( megaTriangleIndex ).add( instId.mul( 1000 ) ) ) );
  1027. // Vis material: unlit visualization of channels
  1028. const resolveVisMaterial = new THREE.NodeMaterial();
  1029. resolveVisMaterial.contextNode = context( {
  1030. positionView: positionViewHelmet,
  1031. positionViewDirection: positionViewDirectionHelmet
  1032. } );
  1033. resolveVisMaterial.vertexNode = fullscreenVertex;
  1034. resolveVisMaterial.depthNode = resolveDepth;
  1035. resolveVisMaterial.fragmentNode = coveredColor( getVisColor(
  1036. outputModeUniform,
  1037. normal_interp,
  1038. applyNormalMap( normal_interp, computeTangent( w0, w1, w2, t_uv0, t_uv1, t_uv2, normal_interp ), sampleMap( sourceMaterial.normalMap ) ),
  1039. uv_interp,
  1040. metalRough.g,
  1041. metalRough.b,
  1042. sampleMap( sourceMaterial.aoMap ).r,
  1043. sampleMap( sourceMaterial.emissiveMap ).rgb
  1044. ) );
  1045. resolveMesh = new THREE.Mesh( resolveGeometry, resolveShadedMaterial );
  1046. resolveMesh.userData.shadedMaterial = resolveShadedMaterial;
  1047. resolveMesh.userData.debugMaterial = resolveDebugMaterial;
  1048. resolveMesh.userData.visMaterial = resolveVisMaterial;
  1049. resolveMesh.frustumCulled = false;
  1050. resolveMesh.renderOrder = 1;
  1051. scene.add( resolveMesh );
  1052. // Presents the scene to the canvas (tone mapping applies here)
  1053. blitTexNode = texture( sceneRT.texture );
  1054. const blitMaterial = new THREE.NodeMaterial();
  1055. blitMaterial.colorNode = blitTexNode;
  1056. blitQuad = new THREE.QuadMesh( blitMaterial );
  1057. }
  1058. updateMode();
  1059. window.addEventListener( 'resize', onWindowResize );
  1060. }
  1061. function updateMode() {
  1062. const outputVal = options.Output;
  1063. const outputModes = {
  1064. 'Default': 0,
  1065. 'Geometry Normal': 1,
  1066. 'Normal Map': 2,
  1067. 'UV': 3,
  1068. 'Roughness': 4,
  1069. 'Metalness': 5,
  1070. 'AO': 6,
  1071. 'Emissive': 7
  1072. };
  1073. if ( outputVal === 'Meshlet Debug' ) {
  1074. resolveMesh.material = resolveMesh.userData.debugMaterial;
  1075. hwMesh.material = hwMesh.userData.debugMaterial;
  1076. renderer.toneMapping = THREE.NoToneMapping;
  1077. } else if ( outputVal !== 'Default' ) {
  1078. outputModeUniform.value = outputModes[ outputVal ];
  1079. resolveMesh.material = resolveMesh.userData.visMaterial;
  1080. hwMesh.material = hwMesh.userData.visMaterial;
  1081. renderer.toneMapping = THREE.NoToneMapping;
  1082. } else {
  1083. outputModeUniform.value = 0;
  1084. resolveMesh.material = resolveMesh.userData.shadedMaterial;
  1085. hwMesh.material = hwMesh.userData.shadedMaterial;
  1086. renderer.toneMapping = THREE.ACESFilmicToneMapping;
  1087. }
  1088. }
  1089. function createScreenBuffers() {
  1090. const size = new THREE.Vector2();
  1091. renderer.getDrawingBufferSize( size );
  1092. const newMaxPixels = size.x * size.y;
  1093. if ( newMaxPixels === maxPixels ) return;
  1094. maxPixels = newMaxPixels;
  1095. if ( screenTriAttr ) screenTriAttr.dispose();
  1096. if ( screenInstAttr ) screenInstAttr.dispose();
  1097. if ( hzbLevelTable === undefined ) {
  1098. hzbLevelTable = uniformArray( Array.from( { length: MAX_HZB_LEVELS }, () => new THREE.Vector4() ), 'vec4' );
  1099. hzbLevelCountUniform = uniform( 0.0 );
  1100. }
  1101. const screenTriData = new Uint32Array( maxPixels );
  1102. screenTriAttr = new THREE.StorageBufferAttribute( screenTriData, 1 );
  1103. const screenInstData = new Uint32Array( maxPixels );
  1104. screenInstAttr = new THREE.StorageBufferAttribute( screenInstData, 1 );
  1105. if ( screenTriAtomic === undefined ) {
  1106. screenTriAtomic = storage( screenTriAttr, 'uint', maxPixels ).toAtomic();
  1107. screenTriRead = storage( screenTriAttr, 'uint', maxPixels ).toReadOnly();
  1108. screenInstAtomic = storage( screenInstAttr, 'uint', maxPixels ).toAtomic();
  1109. screenInstRead = storage( screenInstAttr, 'uint', maxPixels ).toReadOnly();
  1110. } else {
  1111. screenTriAtomic.value = screenTriAttr;
  1112. screenTriAtomic.bufferCount = maxPixels;
  1113. screenTriRead.value = screenTriAttr;
  1114. screenTriRead.bufferCount = maxPixels;
  1115. screenInstAtomic.value = screenInstAttr;
  1116. screenInstAtomic.bufferCount = maxPixels;
  1117. screenInstRead.value = screenInstAttr;
  1118. screenInstRead.bufferCount = maxPixels;
  1119. computeClear.count = maxPixels;
  1120. computeClear.dispose();
  1121. computeRasterize.dispose();
  1122. computeFrustum.dispose();
  1123. computeDispatch.dispose();
  1124. computeHWArgs.dispose();
  1125. resolveMesh.userData.shadedMaterial.dispose();
  1126. resolveMesh.userData.debugMaterial.dispose();
  1127. resolveMesh.userData.visMaterial.dispose();
  1128. hwMesh.userData.shadedMaterial.dispose();
  1129. hwMesh.userData.debugMaterial.dispose();
  1130. hwMesh.userData.visMaterial.dispose();
  1131. }
  1132. // Scene render target (also provides the depth for the pyramid)
  1133. if ( sceneRT ) {
  1134. sceneRT.dispose();
  1135. }
  1136. sceneRT = new THREE.RenderTarget( size.x, size.y, { type: THREE.HalfFloatType } );
  1137. sceneRT.depthTexture = new THREE.DepthTexture( size.x, size.y );
  1138. sceneRT.depthTexture.type = THREE.FloatType;
  1139. if ( blitTexNode ) {
  1140. blitTexNode.value = sceneRT.texture;
  1141. depthSourceTexNode.value = sceneRT.depthTexture;
  1142. }
  1143. // HZB pyramid — all mip levels packed into one storage buffer,
  1144. // level 0 at half resolution, each level the max (farthest) of 2x2 below
  1145. let levelWidth = Math.ceil( size.x / 2 );
  1146. let levelHeight = Math.ceil( size.y / 2 );
  1147. let totalTexels = 0;
  1148. hzbLevelCount = 0;
  1149. while ( hzbLevelCount < MAX_HZB_LEVELS ) {
  1150. hzbLevelTable.array[ hzbLevelCount ].set( totalTexels, levelWidth, levelHeight, 0 );
  1151. totalTexels += levelWidth * levelHeight;
  1152. hzbLevelCount ++;
  1153. if ( levelWidth === 1 && levelHeight === 1 ) break;
  1154. levelWidth = Math.max( 1, Math.ceil( levelWidth / 2 ) );
  1155. levelHeight = Math.max( 1, Math.ceil( levelHeight / 2 ) );
  1156. }
  1157. hzbLevelCountUniform.value = hzbLevelCount;
  1158. const hzbData = new Float32Array( totalTexels ).fill( 1 ); // far plane — occludes nothing
  1159. const hzbAttr = new THREE.StorageBufferAttribute( hzbData, 1 );
  1160. if ( hzbBuffer === undefined ) {
  1161. hzbBuffer = storage( hzbAttr, 'float', totalTexels );
  1162. hzbRead = storage( hzbAttr, 'float', totalTexels ).toReadOnly();
  1163. } else {
  1164. hzbBuffer.value = hzbAttr;
  1165. hzbBuffer.bufferCount = totalTexels;
  1166. hzbRead.value = hzbAttr;
  1167. hzbRead.bufferCount = totalTexels;
  1168. }
  1169. for ( let k = 0; k < hzbKernels.length; k ++ ) {
  1170. const info = hzbLevelTable.array[ Math.min( k, hzbLevelCount - 1 ) ];
  1171. hzbKernels[ k ].count = info.y * info.z;
  1172. hzbKernels[ k ].dispose();
  1173. }
  1174. }
  1175. function onWindowResize() {
  1176. camera.aspect = window.innerWidth / window.innerHeight;
  1177. camera.updateProjectionMatrix();
  1178. renderer.setSize( window.innerWidth, window.innerHeight );
  1179. createScreenBuffers();
  1180. }
  1181. const frustum = new THREE.Frustum();
  1182. const projScreenMatrix = new THREE.Matrix4();
  1183. const prevProjScreen = new THREE.Matrix4();
  1184. const cameraInverse = new THREE.Matrix4();
  1185. const prevCameraPos = new THREE.Vector3();
  1186. let prevValid = false;
  1187. function animate() {
  1188. if ( resolveMesh === undefined ) return; // still loading
  1189. controls.update();
  1190. camera.updateMatrixWorld();
  1191. cameraInverse.copy( camera.matrixWorld ).invert();
  1192. projScreenMatrix.multiplyMatrices( camera.projectionMatrix, cameraInverse );
  1193. // Seed the previous frame state on the first frame
  1194. if ( prevValid === false ) {
  1195. prevProjScreen.copy( projScreenMatrix );
  1196. prevCameraPos.copy( camera.position );
  1197. prevValid = true;
  1198. }
  1199. // Last frame's matrices drive the occlusion test
  1200. prevProjScreenUniform.value.copy( prevProjScreen );
  1201. prevCameraPosUniform.value.copy( prevCameraPos );
  1202. prevProjScreen.copy( projScreenMatrix );
  1203. prevCameraPos.copy( camera.position );
  1204. frustum.setFromProjectionMatrix( projScreenMatrix );
  1205. // Update GPU uniforms
  1206. projScreenMatrixUniform.value.copy( projScreenMatrix );
  1207. cameraPos.value.copy( camera.position );
  1208. cotHalfFovUniform.value = camera.projectionMatrix.elements[ 5 ];
  1209. // Pack frustum planes into the uniform array
  1210. const planes = frustum.planes;
  1211. const planesArray = frustumPlanesUniform.array;
  1212. for ( let i = 0; i < 6; i ++ ) {
  1213. const p = planes[ i ];
  1214. planesArray[ i ].set( p.normal.x, p.normal.y, p.normal.z, p.constant );
  1215. }
  1216. // Compute & Render
  1217. renderer.compute( computeClear );
  1218. renderer.compute( computeFrustum );
  1219. renderer.compute( computeDispatch );
  1220. renderer.compute( computeRasterize );
  1221. renderer.compute( computeHWArgs );
  1222. const rasterMode = options.Rasterizer;
  1223. resolveMesh.visible = ( rasterMode === 'SW Only' || rasterMode === 'Both' );
  1224. hwMesh.visible = ( rasterMode === 'HW Only' || rasterMode === 'Both' );
  1225. // Current frame in linear HDR
  1226. renderer.setRenderTarget( sceneRT );
  1227. renderer.render( scene, camera );
  1228. // Build the depth pyramid for next frame's occlusion culling
  1229. for ( let k = 0; k < hzbLevelCount; k ++ ) {
  1230. renderer.compute( hzbKernels[ k ] );
  1231. }
  1232. // Present (tone mapping + output color space apply on the canvas)
  1233. renderer.setRenderTarget( null );
  1234. blitQuad.render( renderer );
  1235. }
  1236. </script>
  1237. </body>
  1238. </html>
粤ICP备19079148号