webgpu_compute_reduce.html 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393
  1. <html lang="en">
  2. <head>
  3. <title>three.js webgpu - compute reduction</title>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0">
  6. <link type="text/css" rel="stylesheet" href="main.css">
  7. </head>
  8. <body>
  9. <style>
  10. #reduction-panel {
  11. background-color: #111;
  12. width: 100%;
  13. display: flex;
  14. position: fixed;
  15. height: auto;
  16. bottom: 0px;
  17. z-index: 99;
  18. flex-direction: column;
  19. justify-content: center;
  20. align-items: center;
  21. border-left: 2px solid #222;
  22. text-align: center;
  23. }
  24. #panel-title {
  25. width: fit-content;
  26. }
  27. .thread-row {
  28. display: flex;
  29. flex-direction: row;
  30. align-items: center;
  31. margin: 4px 0;
  32. position: relative;
  33. }
  34. .thread {
  35. width: 16px;
  36. height: 16px;
  37. background-color: #444;
  38. margin-right: 2px;
  39. transition: background-color 0.5s, transform 0.5s;
  40. }
  41. .stage-display {
  42. display: flex;
  43. flex-direction: column;
  44. justify-content: center;
  45. margin-bottom: 5px;
  46. }
  47. .stage-label {
  48. font-size: 1.2em;
  49. color: #aaa;
  50. font-style: bold;
  51. margin-top: 6px;
  52. margin-bottom: 20px;
  53. }
  54. .thread {
  55. display: flex;
  56. justify-content: center;
  57. align-items: center;
  58. width: 40px;
  59. height: 40px;
  60. margin: 2px;
  61. border: 1px solid rgba(255, 255, 255, 0.2);
  62. border-radius: 4px;
  63. background: linear-gradient(180deg, rgba(255,255,255,0.05), rgba(0,0,0,0.2));
  64. box-shadow: inset 0 0 2px rgba(255,255,255,0.1);
  65. font-family: monospace;
  66. color: white;
  67. }
  68. .thread_data {
  69. display: block;
  70. max-width: 100%;
  71. padding: 0 2px;
  72. white-space: nowrap;
  73. overflow: hidden;
  74. text-overflow: ellipsis;
  75. font-size: clamp(8px, 2vw, 14px);
  76. text-align: center;
  77. }
  78. .subgroup {
  79. display: flex;
  80. position: relative;
  81. margin-left: 10px;
  82. margin-right: 10px;
  83. }
  84. .subgroup::before {
  85. /* label text for each subgroup label */
  86. content: "subgroupAdd()";
  87. position: absolute;
  88. top: -20px;
  89. /* Hide until animation is displayed */
  90. opacity: 0;
  91. z-index: 100;
  92. transition: opacity 0.5s ease;
  93. font-weight: bold;
  94. color: white;
  95. width: 100%;
  96. }
  97. .subgroup::after {
  98. content: attr(data-label);
  99. position: absolute;
  100. bottom: -20px;
  101. opacity: 1;
  102. z-index: 100;
  103. color: gray;
  104. width: 100%;
  105. }
  106. .reduction-stage {
  107. margin-bottom: 20px;
  108. }
  109. @keyframes labelAbsorb {
  110. 0% {
  111. opacity: 0;
  112. transform: translateY(-50%);
  113. }
  114. 40% {
  115. opacity: 1;
  116. transform: translateY(0%);
  117. }
  118. 60% {
  119. opacity: 1;
  120. transform: translateY(0%);
  121. }
  122. 80% {
  123. opacity: 1;
  124. transform: translate(0%, -20%);
  125. }
  126. 100% {
  127. opacity: 0;
  128. transform: translate(0%, 100%);
  129. }
  130. }
  131. .subgroup.anim::before {
  132. opacity: 0;
  133. animation-name: labelAbsorb;
  134. animation-duration: 1.5s;
  135. transition:
  136. transform 0.6s ease-out,
  137. opacity 0.3s ease-in 0.3s;
  138. }
  139. </style>
  140. <div id="info">
  141. <a href="https://threejs.org" target="_blank" rel="noopener">three.js</a>
  142. <br /> This example demonstrates the performance of various simple parallel reduction kernels.
  143. <br /> Reference implementations are translated from the CUDA/WGSL code present in the following books/repos:
  144. <br /> Impl. 0 - 2: <a href="https://www.cambridge.org/core/books/programming-in-parallel-with-cuda/C43652A69033C25AD6933368CDBE084C"><i>Programming in Parallel with CUDA</i></a> by <a href="https://people.bss.phy.cam.ac.uk/~rea1/">Richard Ansorge</a>
  145. <br /> Impl. 3: <a href="https://github.com/frost-beta/betann/blob/main/betann/wgsl/reduce_all.wgsl"><i>betann reduce_all kernel</i></a> by <a href="https://github.com/zcbenz">zcbenz</a>
  146. <br /> Impl. 4: <a href="https://github.com/b0nes164/GPUPrefixSums/blob/main/GPUPrefixSumsWebGPUapis/SharedShaders/rts.wgsl"><i>GPUPrefixSums reduction approach</i></a> by <a href="https://github.com/b0nes164">b0nes164</a>
  147. <div id="left_side_display" style="position: absolute;top: 150px;left: 0;padding: 10px;background: rgba( 0, 0, 0, 0.5 );color: #fff;font-family: monospace;font-size: 12px;line-height: 1.5;pointer-events: none;text-align: left;"></div>
  148. <div id="right_side_display" style="position: absolute;top: 150px;right: 0;padding: 10px;background: rgba( 0, 0, 0, 0.5 );color: #fff;font-family: monospace;font-size: 12px;line-height: 1.5;pointer-events: none;text-align: left;"></div>
  149. </div>
  150. <div id="reduction-panel">
  151. <h3 id="panel-title" style="flex: 0 0 auto;">Subgroup Reduction Explanation</h3>
  152. <div class="reduction-stage" id="subgroup-reduction-stage">
  153. <div class="stage-label">Use subgroupAdd() to capture reduction of each workgroup's subgroups (Hover for animation)</div>
  154. <div class="stage-display">
  155. <div id="workgroup_threads" style="display: flex; justify-content: center; margin-bottom: 20px;"></div>
  156. <div id="subgroup_reduction" style="display: flex; justify-content: center; margin-bottom: 5px;"></div>
  157. </div>
  158. </div>
  159. </div>
  160. <script type="importmap">
  161. {
  162. "imports": {
  163. "three": "../build/three.webgpu.js",
  164. "three/webgpu": "../build/three.webgpu.js",
  165. "three/tsl": "../build/three.tsl.js",
  166. "three/addons/": "./jsm/"
  167. }
  168. }
  169. </script>
  170. <script type="module">
  171. import * as THREE from 'three/webgpu';
  172. import { instancedArray, Loop, If, vec3, dot, clamp, storage, uvec4, subgroupAdd, uniform, uv, uint, float, Fn, vec2, invocationLocalIndex, invocationSubgroupIndex, uvec2, floor, instanceIndex, workgroupId, workgroupBarrier, workgroupArray, subgroupSize, select, countTrailingZeros } from 'three/tsl';
  173. import WebGPU from 'three/addons/capabilities/WebGPU.js';
  174. import { GUI } from 'three/addons/libs/lil-gui.module.min.js';
  175. const timestamps = {
  176. left_side_display: document.getElementById( 'left_side_display' ),
  177. right_side_display: document.getElementById( 'right_side_display' )
  178. };
  179. const divRoundUp = ( size, part_size ) => {
  180. return Math.floor( ( size + part_size - 1 ) / part_size );
  181. };
  182. const cssSubgroupSize = 4;
  183. const cssWorkgroupSize = 16;
  184. const workgroupThreadsContainer = document.getElementById( 'workgroup_threads' );
  185. const subgroupReductionContainer = document.getElementById( 'subgroup_reduction' );
  186. document.getElementById( 'panel-title' ).textContent += ` (Subgroup Size: ${cssSubgroupSize}, Workgroup Size: ${cssWorkgroupSize})`;
  187. const createThreadWithData = ( data ) => {
  188. const threadEle = document.createElement( 'div' );
  189. threadEle.className = 'thread';
  190. const threadData = document.createElement( 'span' );
  191. threadData.textContent = data; // safer than innerHTML for just text
  192. threadData.className = 'thread_data';
  193. threadEle.append( threadData );
  194. return threadEle;
  195. };
  196. // Create thread elements
  197. const workgroupThreads = [];
  198. const initialSubgroups = [];
  199. const initialData = [];
  200. let currentSubgroupDiv = null;
  201. for ( let i = 0; i < cssWorkgroupSize; i ++ ) {
  202. if ( i % cssSubgroupSize === 0 ) {
  203. const currentSubgroupIndex = Math.floor( i / cssSubgroupSize );
  204. const subgroupReductionThread = createThreadWithData( 0 );
  205. subgroupReductionThread.id = `subgroup_reduction_element_${currentSubgroupIndex}`;
  206. subgroupReductionContainer.appendChild( subgroupReductionThread );
  207. currentSubgroupDiv = document.createElement( 'div' );
  208. currentSubgroupDiv.className = 'subgroup';
  209. currentSubgroupDiv.setAttribute( 'data-label', `Threads ${currentSubgroupIndex * cssSubgroupSize}-${( currentSubgroupIndex + 1 ) * cssSubgroupSize - 1}` );
  210. initialSubgroups.push( currentSubgroupDiv );
  211. workgroupThreadsContainer.appendChild( currentSubgroupDiv );
  212. }
  213. const data = Math.floor( Math.random() * 9 ) + 1;
  214. initialData.push( data );
  215. const thread = createThreadWithData( data );
  216. workgroupThreads.push( thread );
  217. currentSubgroupDiv.appendChild( thread );
  218. }
  219. const deactivateLabelAnimation = ( subgroupDiv, idx ) => {
  220. subgroupDiv.classList.remove( 'anim' );
  221. const subgroupReductionBufferElement = document.getElementById( `subgroup_reduction_element_${idx}` ).querySelector( '.thread_data' );
  222. subgroupReductionBufferElement.innerHTML = 0;
  223. };
  224. const activateLabelAnimation = ( subgroupDiv, idx ) => {
  225. const threads = Array.from( subgroupDiv.children );
  226. let total = 0;
  227. for ( let i = idx * cssSubgroupSize; i < idx * cssSubgroupSize + cssSubgroupSize; i ++ ) {
  228. total += initialData[ i ];
  229. }
  230. subgroupDiv.classList.add( 'anim' );
  231. setTimeout( () => {
  232. threads.forEach( t => {
  233. t.querySelector( '.thread_data' ).textContent = total;
  234. } );
  235. const subgroupReductionBufferElement = document.getElementById( `subgroup_reduction_element_${idx}` ).querySelector( '.thread_data' );
  236. subgroupReductionBufferElement.innerHTML = total;
  237. }, 1000 );
  238. // Remove the class after the animation ends so it can be triggered again
  239. setTimeout( () => {
  240. subgroupDiv.classList.remove( 'anim' );
  241. }, 1500 ); // matches animation duration in CSS
  242. };
  243. document.getElementById( 'subgroup-reduction-stage' ).addEventListener( 'mouseenter', () => {
  244. initialSubgroups.forEach( ( subgroupDiv, idx ) => {
  245. activateLabelAnimation( subgroupDiv, idx );
  246. } );
  247. } );
  248. document.getElementById( 'subgroup-reduction-stage' ).addEventListener( 'mouseleave', () => {
  249. initialSubgroups.forEach( ( subgroupDiv, idx ) => {
  250. deactivateLabelAnimation( subgroupDiv, idx );
  251. } );
  252. workgroupThreads.forEach( ( thread, idx ) => {
  253. thread.querySelector( '.thread_data' ).textContent = initialData[ idx ];
  254. } );
  255. } );
  256. if ( WebGPU.isAvailable() === false ) {
  257. document.body.appendChild( WebGPU.getErrorMessage() );
  258. throw new Error( 'No WebGPU support' );
  259. }
  260. // Total number of elements and the dimensions of the display grid.
  261. const size = 262144;
  262. const vecSize = divRoundUp( size, 4 );
  263. // Grid display is gridDim x gridDim
  264. const gridDim = Math.sqrt( size );
  265. let maxWorkgroupSize = 64;
  266. // Algorithm speed increase as you iterate through algorithms array
  267. const algorithms = [
  268. 'Reduce 0 (N/2)',
  269. 'Reduce 1 (Naive Accumulate)',
  270. 'Reduce 2 (Workgroup Reduction)',
  271. 'Reduce 3 (Subgroup Reduce)',
  272. 'Reduce 4 (Subgroup Optimized)',
  273. 'Incorrect Baseline',
  274. ];
  275. // Input Grid: Displays input data in a grid format
  276. // Input Log2: Displays input grid data's logarithmic indices horizontally (1, 2, 4, 8, 16, ..., size)
  277. // Input Element 0: Displays clamped input[0]
  278. const displayModes = [ 'Input Grid', 'Input Log2', 'Input Element 0', 'Workgroup Sum Grid' ];
  279. // Holds uniforms for both displays as well as debug information
  280. const unifiedEffectController = {
  281. // Number of elements in the grid
  282. gridElementWidth: uniform( gridDim ),
  283. gridElementHeight: uniform( gridDim ),
  284. // Number of elements in the grid being displayed
  285. gridDisplayWidth: uniform( gridDim ),
  286. gridDisplayHeight: uniform( gridDim ),
  287. // How to display end result of reduction.
  288. // Ideally this is unique to the reduction method being deployed
  289. 'Display Mode': 'Input Log2',
  290. loggedBuffer: 'Input Buffer',
  291. elementsReduced: size,
  292. };
  293. const leftEffectController = {
  294. // Current reduction algorithm being executed by this side
  295. algo: 'Reduce 0 (N/2)',
  296. // Flag indicating whether to highlight element in validation check
  297. highlight: uniform( 0 ),
  298. // Uniform that corresponds to the index of the current algorithm within the algorithms array
  299. currentAlgo: uniform( 0 ),
  300. // Current state of reduction (Running, validating, resetting)
  301. state: 'Run Algo',
  302. // Current display mode
  303. displayMode: 'Input Log2',
  304. // Reduce 0 specific uniform
  305. numThreadsDispatched: uniform( size / 2 ),
  306. // The subgroup size used by this side's device
  307. };
  308. const rightEffectController = {
  309. algo: 'Reduce 4 (Subgroup Optimized)',
  310. currentAlgo: uniform( 3 ),
  311. highlight: uniform( 0 ),
  312. displayMode: 'Input Element 0',
  313. state: 'Run Algo',
  314. numThreadsDispatched: uniform( size / 2 )
  315. };
  316. const leftMaterial = new THREE.MeshBasicNodeMaterial( { color: 0x00ff00 } );
  317. const rightMaterial = new THREE.MeshBasicNodeMaterial( { color: 0x00ff00 } );
  318. const leftDisplayColorNodes = {};
  319. const rightDisplayColorNodes = {};
  320. const gui = new GUI();
  321. gui.add( leftEffectController, 'algo', algorithms ).onChange( () => {
  322. leftEffectController.currentAlgo.value = algorithms.findIndex( val => val === leftEffectController.algo );
  323. } );
  324. gui.add( rightEffectController, 'algo', algorithms ).onChange( () => {
  325. rightEffectController.currentAlgo.value = algorithms.findIndex( val => val === rightEffectController.algo );
  326. } );
  327. gui.add( leftEffectController, 'displayMode', displayModes ).name( 'Left Display Mode' ).onChange( () => {
  328. leftMaterial.colorNode = leftDisplayColorNodes[ leftEffectController.displayMode ];
  329. leftMaterial.needsUpdate = true;
  330. } );
  331. gui.add( rightEffectController, 'displayMode', displayModes ).name( 'Right Display Mode' ).onChange( () => {
  332. rightMaterial.colorNode = rightDisplayColorNodes[ rightEffectController.displayMode ];
  333. rightMaterial.needsUpdate = true;
  334. } );
  335. const debugFolder = gui.addFolder( 'Debug' );
  336. const elementsReducedController = debugFolder.add( unifiedEffectController, 'elementsReduced' ).name( 'Elements Reduced' );
  337. elementsReducedController.disable();
  338. const stateLeftController = debugFolder.add( leftEffectController, 'state' ).name( 'Left Display State' );
  339. const stateRightController = debugFolder.add( rightEffectController, 'state' ).name( 'Right Display State' );
  340. stateLeftController.disable();
  341. stateRightController.disable();
  342. debugFolder.add( unifiedEffectController, 'loggedBuffer', [ 'Input Buffer', 'Input Vectorized Buffer', 'Workgroup Sums Buffer', 'Debug Buffer' ] ).name( 'Buffer to Log' );
  343. debugFolder.close();
  344. // HELPER FUNCTIONS
  345. const pow2Ceil = Fn( ( [ x ] ) => {
  346. If( x.equal( uint( 0 ) ), () => {
  347. return uint( 1 );
  348. } );
  349. const val = x.sub( 1 ).toVar( 'val' );
  350. val.assign( val.bitOr( val.shiftRight( 1 ) ) );
  351. val.assign( val.bitOr( val.shiftRight( 2 ) ) );
  352. val.assign( val.bitOr( val.shiftRight( 4 ) ) );
  353. val.assign( val.bitOr( val.shiftRight( 8 ) ) );
  354. val.assign( val.bitOr( val.shiftRight( 16 ) ) );
  355. return val.add( 1 );
  356. } ).setLayout( {
  357. name: 'pow2Ceil',
  358. type: 'uint',
  359. inputs: [
  360. { name: 'x', type: 'uint' }
  361. ]
  362. } );
  363. // ALGORITHM CONSTRUCTORS
  364. // REDUCE 1
  365. // Thanks to Sam0oneau of Graphics Programming Discord for the explanation.
  366. // (Graphics Programming Discord Message Link): https://discord.com/channels/318590007881236480/374061825454768129/1391248956171882597
  367. /* Reduce 1 Example (Assume Workgroup Size 256, numElements: 262144) -> Initial currentBuffer State: | 1, 1, 1, 1, ... |
  368. *
  369. * KERNEL 1:
  370. * Executes 256 threads by 256 workgroups. Each thread loops 4 times and accesses elements
  371. * at the indices below.
  372. * Thread 1 Thread 2 Thread 3
  373. * | 0, 65536, ..., n * 65536 | 1, 65537, .... (n * 65536) + 1 | 1, 65538, .... (n * 65536) + 2 | etc
  374. * Buffer Values: | 4, 4, 4, 4, ...|
  375. *
  376. * KERNEL 2:
  377. * Executes 256 threads by one workgroup. Each thread loops 1024 times
  378. * Thread 1 Thread 2 Thread 3
  379. * | 0, 256, ...., n * 256 | 1, 257, ... (n * 256) + 1 | 2, 258, ... (n * 256) + 3 | etc
  380. * Buffer Values: | 1024, 1024, 1024, 1024, ... |
  381. *
  382. * KERNEL 3:
  383. * Executes 1 thread by one workgroup. Single thread loops 256 times
  384. * Thread 1
  385. * | 0, 1, 2, 3, 4, 5, 6 ... etc|
  386. * Buffer Values: [262144, 1024, 1024]
  387. */
  388. const createReduce1Fn = ( createReduce1FnProps ) => {
  389. const { dispatchSize, numElements, inputBuffer, workgroupSize } = createReduce1FnProps;
  390. const fnDef = Fn( () => {
  391. const dispatch = uint( dispatchSize ).toVar( 'dispatchSize' );
  392. const tSum = uint( 0 ).toVar();
  393. const k = instanceIndex.toVar( 'k' );
  394. Loop( k.lessThan( uint( numElements ) ), ( ) => {
  395. tSum.addAssign( inputBuffer.element( k ) );
  396. k.addAssign( uint( dispatch ) );
  397. } );
  398. inputBuffer.element( instanceIndex ).assign( tSum );
  399. } )().compute( dispatchSize, [ workgroupSize ] );
  400. return fnDef;
  401. };
  402. // REDUCE 2
  403. // For non power of 2 # of workgroups
  404. const createReduce2Fn = ( createReduce2FnProps ) => {
  405. const { workgroupSize, dispatchSize, numElements, inputBuffer } = createReduce2FnProps;
  406. const fnDef = Fn( () => {
  407. const tSum = workgroupArray( 'uint', workgroupSize );
  408. const k = instanceIndex.toVar( 'k' );
  409. tSum.element( invocationLocalIndex ).assign( uint( 0 ) );
  410. Loop( k.lessThan( uint( numElements ) ), () => {
  411. tSum.element( invocationLocalIndex ).addAssign( inputBuffer.element( k ) );
  412. k.addAssign( uint( dispatchSize ) );
  413. } );
  414. workgroupBarrier();
  415. // Reset the loop condition (account for numWorkgroups % 2 != 0)
  416. k.assign( pow2Ceil( uint( workgroupSize ) ).div( 2 ) );
  417. Loop( k.greaterThan( 0 ), () => {
  418. If( invocationLocalIndex.lessThan( k ).and( invocationLocalIndex.add( k ).lessThan( workgroupSize ) ), () => {
  419. tSum.element( invocationLocalIndex ).addAssign( tSum.element( invocationLocalIndex.add( k ) ) );
  420. } );
  421. workgroupBarrier();
  422. k.divAssign( 2 );
  423. } );
  424. If( invocationLocalIndex.equal( uint( 0 ) ), () => {
  425. inputBuffer.element( workgroupId.x ).assign( tSum.element( uint( 0 ) ) );
  426. } );
  427. } )().compute( dispatchSize, [ workgroupSize ] );
  428. return fnDef;
  429. };
  430. // REDUCE 3
  431. /* Create array with enough indices for worst-case subgroup size */
  432. const createSubgroupArray = ( type, workgroupSize, minSubgroupSize = 4 ) => {
  433. return workgroupArray( 'uint', workgroupSize / minSubgroupSize );
  434. };
  435. // zcbenz implementation
  436. // https://github.com/frost-beta/betann/blob/8aa2701caf63fb29bd4cd2454e656973342c1588/betann/wgsl/reduce_ops.wgsl#L71
  437. const RowReduce = ( rowReduceProps ) => {
  438. const { workgroupSize, inputBuffer, total, rowOffset, currentRowSize, workPerThread, vectorized } = rowReduceProps;
  439. // Number of unvectorized elements each workgroup can ingest
  440. // At workgroupSize of 256, blockSize will be 1024
  441. const blockSize = uint( workgroupSize ).mul( workPerThread );
  442. const block = uint( 0 ).toVar( 'block' );
  443. // At rowSize of 2048, there will be two blocks
  444. const blockLimiter = currentRowSize.div( blockSize ).toVar( 'blockLimiter' );
  445. Loop( block.lessThan( blockLimiter ), () => {
  446. const blockOffset = block.mul( blockSize );
  447. const startThread = blockOffset.add( invocationLocalIndex.mul( workPerThread ) );
  448. const localThreadOffset = uint( 0 ).toVar( 'localThreadOffset' );
  449. Loop( localThreadOffset.lessThan( workPerThread ), () => {
  450. const inputElement = inputBuffer.element( rowOffset.add( startThread ).addLocal );
  451. if ( vectorized ) {
  452. const value = dot( inputElement, uvec4( 1 ) );
  453. total.addAssign( value );
  454. } else {
  455. const inputElement = inputBuffer.element( rowOffset.add( startThread ).add( localThreadOffset ) );
  456. total.addAssign( inputElement );
  457. }
  458. // Increment up a thread
  459. localThreadOffset.addAssign( 1 );
  460. } );
  461. // Increment up a block
  462. block.addAssign( 1 );
  463. } );
  464. // Ignoring left over check for this example, since we know ahead of time the value of leftover (2048 % 1024 === 0)
  465. };
  466. const WorkgroupReduce = ( workgroupReduceProps ) => {
  467. const { total, workgroupSize } = workgroupReduceProps;
  468. const subgroupSums = createSubgroupArray( 'uint', workgroupSize );
  469. // Assign sum of all values in subgroup to total
  470. total.assign( subgroupAdd( total ) );
  471. const delta = uint( workgroupSize ).div( subgroupSize ).toVar( 'delta' );
  472. const subgroupMetaRank = invocationLocalIndex.div( subgroupSize );
  473. Loop( float( delta ).greaterThan( 1.0 ), () => {
  474. If( invocationSubgroupIndex.equal( 0 ), () => {
  475. // Each subgroup will populate the subgroupSums array
  476. subgroupSums.element( subgroupMetaRank ).assign( total );
  477. } );
  478. // Ensure that all subgroups in the workgroup have populated the workgroup memory array
  479. workgroupBarrier();
  480. // Thread 0 - subgroupsInWorkgroup will assign a value to total
  481. total.assign( select( invocationLocalIndex.lessThan( delta ), subgroupSums.element( invocationLocalIndex ), 0 ).uniformFlow() );
  482. // # of subgroups in workgroup is invariably less than # of threads in subgroup, so subgroupAdd will still sync here
  483. total.assign( subgroupAdd( total ) );
  484. delta.divAssign( subgroupSize );
  485. } );
  486. };
  487. const createReduce3Fn = ( createReduce3FnProps ) => {
  488. const { workgroupSize, workPerThread, inputBuffer, intermediateBuffer, rowSize } = createReduce3FnProps;
  489. const fnDef = Fn( () => {
  490. const inputSize = uint( inputBuffer.bufferCount );
  491. const rowOffset = workgroupId.x.mul( rowSize );
  492. // If the current rows elements exceed the bounds of the input
  493. // Select either 0 or number of elements left,
  494. // otherwise, select existing ROW_SIZE
  495. const currentRowSize = select(
  496. ( rowOffset.add( rowSize ) ).greaterThan( inputSize ),
  497. select( inputSize.greaterThan( rowOffset ), inputSize.sub( rowOffset ), 0 ).uniformFlow(),
  498. rowSize,
  499. ).uniformFlow();
  500. const total = uint( 0 ).toVar( 'total' );
  501. RowReduce( {
  502. inputBuffer: inputBuffer,
  503. total: total,
  504. rowOffset: rowOffset,
  505. currentRowSize: currentRowSize,
  506. workPerThread: workPerThread,
  507. workgroupSize: workgroupSize,
  508. } );
  509. WorkgroupReduce( {
  510. total: total,
  511. workgroupSize: workgroupSize,
  512. } );
  513. // Populate each workgroup with its reduction
  514. If( invocationLocalIndex.equal( 0 ), () => {
  515. intermediateBuffer.element( workgroupId.x ).assign( total );
  516. } );
  517. } )();
  518. return fnDef;
  519. };
  520. // REDUCE 4
  521. // b0nes164 inspired implementation with vec4
  522. const createReduce4Fn = ( props ) => {
  523. // Can't pass in subgroup size since we can't always be certain what size is at runtime
  524. const { size, workPerThread, workgroupSize, inputBuffer, intermediateBuffer } = props;
  525. const ELEMENTS_PER_VEC4 = 4;
  526. // The number of individual elements a single workgroup will access
  527. const partitionSize = workgroupSize * workPerThread * ELEMENTS_PER_VEC4;
  528. const vecSize = divRoundUp( size, ELEMENTS_PER_VEC4 );
  529. // Can also be calculated using divRoundUp( vecSize, workgroupSize * workPerThread );
  530. const numWorkgroups = divRoundUp( size, partitionSize );
  531. // Currently no way to specify dispatch size in increments of workgroups, so we convert to numInvocations
  532. const numInvocations = numWorkgroups * workgroupSize;
  533. const fnDef = Fn( () => {
  534. const perSubgroupReductionArray = createSubgroupArray( 'uint', workgroupSize );
  535. // Get the index of the subgroup within the workgroup
  536. const subgroupMetaRank = invocationLocalIndex.div( subgroupSize );
  537. // Each subgroup block scans across 4 subgroups. So when we move into a new subgroup,
  538. // align that subgroups' accesses to the next 4 subgroups
  539. const subgroupOffset = subgroupMetaRank.mul( subgroupSize ).mul( workPerThread );
  540. subgroupOffset.addAssign( invocationSubgroupIndex );
  541. // Per workgroup, offset by number of vectorized elements scanned per workgroup
  542. const workgroupOffset = workgroupId.x.mul( uint( maxWorkgroupSize ).mul( workPerThread ) );
  543. const startThread = subgroupOffset.add( workgroupOffset );
  544. const subgroupReduction = uint( 0 );
  545. // Each thread will accumulate values from across 'workPerThread' subgroups
  546. If( workgroupId.x.lessThan( uint( numWorkgroups ).sub( 1 ) ), () => {
  547. Loop( {
  548. start: uint( 0 ),
  549. end: workPerThread,
  550. type: 'uint',
  551. condition: '<',
  552. name: 'currentSubgroupInBlock'
  553. }, () => {
  554. // Get vectorized element from input array
  555. const val = inputBuffer.element( startThread );
  556. // Sum values within vec4 together by using result of dot product
  557. subgroupReduction.addAssign( dot( uvec4( 1 ), val ) );
  558. // Increment so thread will scan value in next subgroup
  559. startThread.addAssign( subgroupSize );
  560. } );
  561. } );
  562. // Ensure that the last workgroup does not access out of bounds indices
  563. If( workgroupId.x.equal( uint( numWorkgroups ).sub( 1 ) ), () => {
  564. Loop( {
  565. start: uint( 0 ),
  566. end: workPerThread,
  567. type: 'uint',
  568. condition: '<',
  569. name: 'currentSubgroupInBlock'
  570. }, () => {
  571. // Ensure index is less than number of available vectors in inputBuffer
  572. const val = select( startThread.lessThan( uint( vecSize ) ), inputBuffer.element( startThread ), uvec4( 0 ) ).uniformFlow();
  573. subgroupReduction.addAssign( dot( val, uvec4( 1 ) ) );
  574. startThread.addAssign( subgroupSize );
  575. } );
  576. } );
  577. subgroupReduction.assign( subgroupAdd( subgroupReduction ) );
  578. // Assuming that each element in the input buffer is 1, we generally expect each invocation's subgroupReduction
  579. // value to be ELEMENTS_PER_VEC4 * workPerThread * subgroupSize
  580. // Delegate one thread per subgroup to assign each subgroup's reduction to the workgroup array
  581. If( invocationSubgroupIndex.equal( uint( 0 ) ), () => {
  582. perSubgroupReductionArray.element( subgroupMetaRank ).assign( subgroupReduction );
  583. } );
  584. // Ensure that each workgroup has populated the perSubgroupReductionArray with data
  585. // from each of it's subgroups
  586. workgroupBarrier();
  587. if ( props.debugBuffer ) {
  588. If( invocationLocalIndex.equal( uint( 0 ) ), () => {
  589. props.debugBuffer.element( workgroupId.x ).assign( subgroupReduction );
  590. } );
  591. workgroupBarrier();
  592. }
  593. // WORKGROUP LEVEL REDUCE
  594. // Multiple approaches here
  595. // log2(subgroupSize) -> TSL log2 function
  596. // countTrailingZeros/findLSB(subgroupSize) -> TSL function that counts trailing zeros in number bit representation
  597. // Can technically petition GPU for subgroupSize in shader and calculate logs on CPU at cost of shader being generalizable across devices
  598. // May also break if subgroupSize changes when device is lost or if program is rerun on lower power device
  599. const subgroupSizeLog = countTrailingZeros( subgroupSize ).toVar( 'subgroupSizeLog' );
  600. const spineSize = uint( workgroupSize ).shiftRight( subgroupSizeLog );
  601. const spineSizeLog = countTrailingZeros( spineSize ).toVar( 'spineSizeLog' );
  602. // Align size to powers of subgroupSize
  603. const squaredSubgroupLog = ( spineSizeLog.add( subgroupSizeLog ).sub( 1 ) );
  604. squaredSubgroupLog.divAssign( subgroupSizeLog );
  605. squaredSubgroupLog.mulAssign( subgroupSizeLog );
  606. const alignedSize = ( uint( 1 ).shiftLeft( squaredSubgroupLog ) ).toVar( 'alignedSize' );
  607. // aligned size 2 * 4
  608. const offset = uint( 0 );
  609. // In cases where the number of subgroups in a workgroup is greater than the subgroup size itself,
  610. // we need to iterate over the array again to capture all the data in the workgroup array buffer
  611. Loop( { start: subgroupSize, end: alignedSize, condition: '<=', name: 'j', type: 'uint', update: '<<= subgroupSizeLog' }, () => {
  612. const subgroupIndex = ( ( invocationLocalIndex.add( 1 ) ).shiftLeft( offset ) ).sub( 1 );
  613. const isValidSubgroupIndex = subgroupIndex.lessThan( spineSize ).toVar( 'isValidSubgroupIndex' );
  614. // Reduce values within the local workgroup memory.
  615. // Set toVar to ensure subgroupAdd executes before (not within) the if statement.
  616. const t = subgroupAdd(
  617. select(
  618. isValidSubgroupIndex,
  619. perSubgroupReductionArray.element( subgroupIndex ),
  620. 0
  621. ).uniformFlow()
  622. ).toVar( 't' );
  623. // Can assign back to workgroupArray since all
  624. // subgroup threads work in lockstop for subgroupAdd
  625. If( isValidSubgroupIndex, () => {
  626. perSubgroupReductionArray.element( subgroupIndex ).assign( t );
  627. } );
  628. // Ensure all threads have completed work
  629. workgroupBarrier();
  630. offset.addAssign( subgroupSizeLog );
  631. } );
  632. // Assign single thread from workgroup to assign workgroup reduction
  633. If( invocationLocalIndex.equal( uint( 0 ) ), () => {
  634. const reducedWorkgroupSum = perSubgroupReductionArray.element( uint( spineSize ).sub( 1 ) );
  635. intermediateBuffer.element( workgroupId.x ).assign( reducedWorkgroupSum );
  636. } );
  637. } )().compute( numInvocations, [ maxWorkgroupSize ] );
  638. return fnDef;
  639. };
  640. // INCORRECT BASELINE
  641. const createIncorrectBaselineFn = ( incorrectBaselineProps ) => {
  642. const { inputBuffer } = incorrectBaselineProps;
  643. const fnDef = Fn( () => {
  644. inputBuffer.element( instanceIndex ).assign( 99999 );
  645. } )();
  646. return fnDef;
  647. };
  648. init();
  649. init( false );
  650. async function init( leftSideDisplay = true ) {
  651. const effectController = leftSideDisplay ? leftEffectController : rightEffectController;
  652. const aspect = ( window.innerWidth / 2 ) / window.innerHeight;
  653. const camera = new THREE.OrthographicCamera( - aspect, aspect, 1, - 1, 0, 2 );
  654. camera.position.z = 1;
  655. const scene = new THREE.Scene();
  656. const array = new Uint32Array( Array.from( { length: size }, () => {
  657. return 1;
  658. } ) );
  659. // Represents array of data as uints in compute shader.
  660. const inputStorage = instancedArray( array, 'uint' ).setPBO( true ).setName( `Current_${leftSideDisplay ? 'Left' : 'Right'}` );
  661. // Represents array of data as vec4s in compute shader;
  662. const inputVec4BufferAttribute = new THREE.StorageInstancedBufferAttribute( array, 4 );
  663. const inputVectorizedStorage = storage( inputVec4BufferAttribute, 'uvec4', vecSize ).setPBO( true ).setName( `CurrentVectorized_${leftSideDisplay ? 'Left' : 'Right'}` );
  664. // Reduce 3 Calculations
  665. const workPerThread = 4;
  666. const numRows = workPerThread * 32;
  667. const rowSize = divRoundUp( size, numRows );
  668. const workgroupSumsArray = new Uint32Array( numRows );
  669. const workgroupSumsStorage = instancedArray( workgroupSumsArray, 'uint' ).setPBO( true ).setName( `WorkgroupSums_${leftSideDisplay ? 'Left' : 'Right'}` );
  670. const debugArray = new Uint32Array( 1024 );
  671. const debugStorage = instancedArray( debugArray, 'uint' ).setPBO( true ).setName( `Debug_${leftSideDisplay ? 'Left' : 'Right'}` );
  672. const buffers = {
  673. 'Input Buffer': inputStorage,
  674. 'Input Vectorized Buffer': inputVectorizedStorage,
  675. 'Workgroup Sums Buffer': workgroupSumsStorage,
  676. 'Debug Buffer': debugStorage,
  677. };
  678. const logFunctionName = `Log ${leftSideDisplay ? 'Left' : 'Right'} Side`;
  679. const functionObj = {};
  680. functionObj[ logFunctionName ] = async() => {
  681. const selectedBuffer = buffers[ unifiedEffectController.loggedBuffer ];
  682. const readbackBuffer = new THREE.ReadbackBuffer( selectedBuffer.value );
  683. const result = new Uint32Array( await renderer.getArrayBufferAsync( readbackBuffer ) );
  684. console.log( result );
  685. // Remove GPU/CPU readback buffer from memory
  686. readbackBuffer.dispose();
  687. };
  688. debugFolder.add( functionObj, `Log ${leftSideDisplay ? 'Left' : 'Right'} Side` );
  689. const computeResetBufferFn = Fn( () => {
  690. inputStorage.element( instanceIndex ).assign( 1 );
  691. } );
  692. const computeResetWorkgroupSumsFn = Fn( () => {
  693. workgroupSumsStorage.element( instanceIndex ).assign( 0 );
  694. } );
  695. // Re-initialize compute buffer
  696. const computeResetBuffer = computeResetBufferFn().compute( size );
  697. const computeResetWorkgroupSums = computeResetWorkgroupSumsFn().compute( 256 );
  698. const renderer = new THREE.WebGPURenderer( { antialias: false, trackTimestamp: true } );
  699. renderer.setPixelRatio( window.devicePixelRatio );
  700. renderer.setSize( window.innerWidth / 2, window.innerHeight );
  701. await renderer.init();
  702. // Unfortunately, need to arbitrarily run compute shader to get access to device limits
  703. renderer.compute( computeResetBuffer );
  704. if ( renderer.backend.device !== null ) {
  705. maxWorkgroupSize = renderer.backend.device.limits.maxComputeWorkgroupSizeX;
  706. }
  707. // Create and store dispatches of reduction of certain size. Map each set of dispatches to algorithm name.
  708. const computeReduce0Fn = Fn( () => {
  709. const { numThreadsDispatched } = effectController;
  710. inputStorage.element( instanceIndex ).addAssign( inputStorage.element( instanceIndex.add( numThreadsDispatched ) ) );
  711. } )();
  712. const reduce0Calls = [];
  713. for ( let i = size / 2; i >= 1; i /= 2 ) {
  714. const reduce0 = computeReduce0Fn.compute( i, [ maxWorkgroupSize ] );
  715. reduce0Calls.push( reduce0 );
  716. }
  717. const reduce1Calls = [
  718. // Accumulation
  719. createReduce1Fn( {
  720. dispatchSize: maxWorkgroupSize * maxWorkgroupSize,
  721. workgroupSize: maxWorkgroupSize,
  722. numElements: size,
  723. inputBuffer: inputStorage,
  724. } ),
  725. // 1 Block accumulation
  726. createReduce1Fn( {
  727. dispatchSize: maxWorkgroupSize,
  728. numElements: maxWorkgroupSize * maxWorkgroupSize,
  729. workgroupSize: maxWorkgroupSize,
  730. inputBuffer: inputStorage,
  731. } ),
  732. // Final result
  733. createReduce1Fn( {
  734. dispatchSize: 1,
  735. numElements: maxWorkgroupSize,
  736. workgroupSize: 1,
  737. inputBuffer: inputStorage
  738. } ),
  739. ];
  740. const reduce2Calls = [
  741. // Accumulate within workgroups
  742. createReduce2Fn( {
  743. workgroupSize: maxWorkgroupSize,
  744. dispatchSize: maxWorkgroupSize * maxWorkgroupSize,
  745. numElements: size,
  746. inputBuffer: inputStorage,
  747. } ),
  748. // 1 Block accumulation
  749. createReduce2Fn( {
  750. workgroupSize: maxWorkgroupSize,
  751. dispatchSize: maxWorkgroupSize,
  752. numElements: maxWorkgroupSize,
  753. inputBuffer: inputStorage,
  754. } ),
  755. ];
  756. const reduce3Calls = [
  757. createReduce3Fn( {
  758. inputBuffer: inputStorage,
  759. intermediateBuffer: workgroupSumsStorage,
  760. workgroupSize: maxWorkgroupSize,
  761. workPerThread: 4,
  762. rowSize: rowSize,
  763. vectorized: false,
  764. } ).compute( maxWorkgroupSize * numRows, [ maxWorkgroupSize ] ),
  765. createReduce3Fn( {
  766. inputBuffer: workgroupSumsStorage,
  767. intermediateBuffer: inputStorage,
  768. workgroupSize: 32,
  769. workPerThread: 4,
  770. rowSize: rowSize,
  771. vectorized: false
  772. } ).compute( 32, [ 32 ] )
  773. ];
  774. const reduce4Calls = [
  775. createReduce4Fn( {
  776. size: size,
  777. inputBuffer: inputVectorizedStorage,
  778. intermediateBuffer: workgroupSumsStorage,
  779. workgroupSize: maxWorkgroupSize,
  780. workPerThread: 4,
  781. } ),
  782. createReduce3Fn( {
  783. inputBuffer: workgroupSumsStorage,
  784. intermediateBuffer: inputStorage,
  785. workgroupSize: 32,
  786. workPerThread: 4,
  787. rowSize: rowSize,
  788. vectorized: false
  789. } ).compute( 32, [ 32 ] )
  790. ];
  791. const incorrectBaselineCalls = [
  792. createIncorrectBaselineFn( {
  793. inputBuffer: inputStorage,
  794. } ).compute( size ),
  795. ];
  796. const calls = {
  797. 'Reduce 0 (N/2)': reduce0Calls,
  798. 'Reduce 1 (Naive Accumulate)': reduce1Calls,
  799. 'Reduce 2 (Workgroup Reduction)': reduce2Calls,
  800. 'Reduce 3 (Subgroup Reduce)': reduce3Calls,
  801. 'Reduce 4 (Subgroup Optimized)': reduce4Calls,
  802. 'Incorrect Baseline': incorrectBaselineCalls
  803. };
  804. const getColor = ( bufferToCheck, colorChanger, width, height ) => {
  805. const subtracter = float( colorChanger ).div( width.mul( height ) );
  806. const color = vec3( subtracter.oneMinus() ).toVar();
  807. const { highlight } = effectController;
  808. // Validate that element 0 is equal to expected result of reduction
  809. If( highlight.equal( 1 ), () => {
  810. If( ( bufferToCheck.element( 0 ) ).equal( size ), () => {
  811. color.assign( vec3( 0.0, subtracter.oneMinus(), 0.0 ) );
  812. } ).Else( () => {
  813. color.assign( vec3( subtracter.oneMinus(), 0.0, 0.0 ) );
  814. } );
  815. } );
  816. return color;
  817. };
  818. const displayNodes = leftSideDisplay ? leftDisplayColorNodes : rightDisplayColorNodes;
  819. displayNodes[ 'Input Grid' ] = Fn( () => {
  820. const { gridElementWidth, gridElementHeight, gridDisplayWidth, gridDisplayHeight } = unifiedEffectController;
  821. const newUV = uv().mul( vec2( gridDisplayWidth, gridDisplayHeight ) );
  822. const pixel = uvec2( uint( floor( newUV.x ) ), uint( floor( newUV.y ) ) );
  823. const elementIndex = uint( gridDisplayWidth ).mul( pixel.y ).add( pixel.x );
  824. const colorChanger = uint( 0 ).toVar();
  825. const color = vec3( 0 ).toVar( 'color' );
  826. colorChanger.assign( inputStorage.element( elementIndex ) );
  827. color.assign( getColor( inputStorage, colorChanger, gridElementWidth, gridElementHeight ) );
  828. return color;
  829. } )();
  830. displayNodes[ 'Input Log2' ] = Fn( () => {
  831. const { gridElementWidth, gridElementHeight } = unifiedEffectController;
  832. const newUV = uv().mul( vec2( Math.log2( size ) ), 1 );
  833. const colorChanger = uint( 0 ).toVar();
  834. const color = vec3( 0 ).toVar( 'color' );
  835. colorChanger.assign( inputStorage.element( uint( 1 ).shiftLeft( newUV.x ) ) );
  836. color.assign( getColor( inputStorage, colorChanger, gridElementWidth, gridElementHeight ) );
  837. return color;
  838. } )();
  839. displayNodes[ 'Input Element 0' ] = Fn( () => {
  840. const { gridElementWidth, gridElementHeight } = unifiedEffectController;
  841. const colorChanger = uint( 0 ).toVar();
  842. const color = vec3( 0 ).toVar( 'color' );
  843. // Clamp display of single element to shade where green is still readable
  844. colorChanger.assign( clamp( inputStorage.element( 0 ), 0, size / 2 ) );
  845. color.assign( getColor( inputStorage, colorChanger, gridElementWidth, gridElementHeight ) );
  846. return color;
  847. } )();
  848. displayNodes[ 'Workgroup Sum Grid' ] = Fn( () => {
  849. const width = uint( 8 );
  850. const height = uint( 16 );
  851. const newUV = uv().mul( vec2( width, height ) );
  852. const pixel = uvec2( uint( floor( newUV.x ) ), uint( floor( newUV.y ) ) );
  853. const elementIndex = uint( width ).mul( pixel.y ).add( pixel.x );
  854. const colorChanger = uint( 0 ).toVar();
  855. const color = vec3( 0 ).toVar( 'color' );
  856. colorChanger.assign( workgroupSumsStorage.element( elementIndex ) );
  857. color.assign( getColor( inputStorage, colorChanger, width, height ) );
  858. return color;
  859. } )();
  860. ( leftSideDisplay ? leftMaterial : rightMaterial ).colorNode = displayNodes[ effectController.displayMode ];
  861. ( leftSideDisplay ? leftMaterial : rightMaterial ).needsUpdate = true;
  862. const plane = new THREE.Mesh( new THREE.PlaneGeometry( 1, 1 ), ( leftSideDisplay ? leftMaterial : rightMaterial ) );
  863. scene.add( plane );
  864. const animate = () => {
  865. renderer.render( scene, camera );
  866. };
  867. renderer.setAnimationLoop( animate );
  868. document.body.appendChild( renderer.domElement );
  869. renderer.domElement.style.position = 'absolute';
  870. renderer.domElement.style.top = '0';
  871. renderer.domElement.style.left = '0';
  872. renderer.domElement.style.width = '50%';
  873. renderer.domElement.style.height = '100%';
  874. if ( ! leftSideDisplay ) {
  875. renderer.domElement.style.left = '50%';
  876. scene.background = new THREE.Color( 0x212121 );
  877. } else {
  878. scene.background = new THREE.Color( 0x313131 );
  879. }
  880. renderer.info.autoReset = false;
  881. const stepAnimation = async function () {
  882. const currentAlgorithm = effectController.algo;
  883. const state = effectController.state;
  884. const stateController = leftSideDisplay ? stateLeftController : stateRightController;
  885. if ( state === 'Reset' ) {
  886. renderer.compute( computeResetBuffer );
  887. renderer.compute( computeResetWorkgroupSums );
  888. } else if ( state === 'Run Algo' ) {
  889. renderer.info.reset();
  890. const cpuTime = 0;
  891. switch ( currentAlgorithm ) {
  892. case 'Reduce 0 (N/2)': {
  893. let m = size / 2;
  894. for ( let i = 0; i < reduce0Calls.length; i ++ ) {
  895. effectController.numThreadsDispatched.value = m;
  896. const reduce0 = reduce0Calls[ i ];
  897. // Do a reduction step
  898. renderer.compute( reduce0 );
  899. renderer.resolveTimestampsAsync( THREE.TimestampQuery.COMPUTE );
  900. m /= 2;
  901. }
  902. break;
  903. }
  904. default: {
  905. const currentAlgoCalls = calls[ currentAlgorithm ];
  906. for ( let i = 0; i < currentAlgoCalls.length; i ++ ) {
  907. renderer.compute( currentAlgoCalls[ i ] );
  908. renderer.resolveTimestampsAsync( THREE.TimestampQuery.COMPUTE );
  909. }
  910. break;
  911. }
  912. }
  913. // DEBUG: const reductionResult = new Uint32Array( await renderer.getArrayBufferAsync( currentBuffer ) )[0];
  914. let passInfoString = '';
  915. if ( effectController.algo.substring( 0, 3 ) === 'CPU' ) {
  916. passInfoString = `Ran in ${cpuTime}ms<br>`;
  917. } else {
  918. passInfoString = `${renderer.info.compute.frameCalls} pass in ${renderer.info.compute.timestamp.toFixed( 6 )}ms<br>`;
  919. }
  920. timestamps[ leftSideDisplay ? 'left_side_display' : 'right_side_display' ].innerHTML = `
  921. Compute ${effectController.algo}: ${passInfoString}`;
  922. }
  923. renderer.render( scene, camera );
  924. renderer.resolveTimestampsAsync( THREE.TimestampQuery.RENDER );
  925. // Validate next state
  926. if ( state === 'Run Algo' ) {
  927. stateController.setValue( 'Validate' );
  928. effectController.highlight.value = 1;
  929. } else if ( state === 'Validate' ) {
  930. stateController.setValue( 'Reset' );
  931. effectController.highlight.value = 0;
  932. } else if ( state === 'Reset' ) {
  933. stateController.setValue( 'Run Algo' );
  934. }
  935. setTimeout( stepAnimation, 1000 );
  936. };
  937. window.addEventListener( 'resize', onWindowResize );
  938. function onWindowResize() {
  939. renderer.setSize( window.innerWidth / 2, window.innerHeight );
  940. const aspect = ( window.innerWidth / 2 ) / window.innerHeight;
  941. const frustumHeight = camera.top - camera.bottom;
  942. camera.left = - frustumHeight * aspect / 2;
  943. camera.right = frustumHeight * aspect / 2;
  944. camera.updateProjectionMatrix();
  945. renderer.render( scene, camera );
  946. }
  947. setTimeout( stepAnimation, 1000 );
  948. }
  949. </script>
  950. </body>
  951. </html>
粤ICP备19079148号