                <canvas id="canvas"></canvas>


                canvas {
  position: fixed;
  top: 0;
  left: 0;
  width: 100%;
  height: 100%;


                // depending on your hardware this works up to just over 8.38 million
// above that it will hit the size limit of a storage buffer
const nInstances = 1000000; 

const speed = 0.6;
const stepAmt = 320;
const worldSize = 300;
const curlStep = 0.25;
const curlOffset = [
  Math.random() * 100 - 50,
  Math.random() * 100 - 50,
  Math.random() * 100 - 50
const rotSpeed = 0.0002;

const workgroupSize = nInstances > 4000000 ? [ 16, 16 ] : [ 8, 8 ];
const computeThreads = workgroupSize[0] * workgroupSize[1];
const computeWorkgroups = Math.ceil(nInstances / computeThreads);

import { mat4, vec3 } from '';

const shader = `
struct UBO {
  time: f32,
  resolution: vec2f,
  projection: mat4x4f,
  view: mat4x4f

@group(0) @binding(0) var<uniform> uniforms: UBO;

struct VSIn {
  @location(0) in_pos: vec3f,
  @location(1) uv: vec2f,
  @location(2) noise: vec3f

struct VSOut {
  @builtin(position) position: vec4<f32>,
  @location(0) uv: vec2<f32>

fn vs_main(vs_in: VSIn) -> VSOut {
  let r = uniforms.time * ${rotSpeed};
  let s = sin(r);
  let c = cos(r);
  let xform = mat4x4f(
    vec4(1, 0, 0, 0),
    vec4(0, 1, 0, 0),
    vec4(0, 0, 1, 0),
    vec4(vs_in.noise, 1)
  let rot = mat4x4f(
    vec4(c, 0, s, 0),
    vec4(0, 1, 0, 0),
    vec4(-s, 0, c, 0),
    vec4(0, 0, 0, 1)

  var matrix = uniforms.view * rot * xform;
  matrix[0][0] = 1.0; matrix[0][1] = 0.0; matrix[0][2] = 0.0; 
  matrix[1][0] = 0.0; matrix[1][1] = 1.0; matrix[1][2] = 0.0;
  matrix[2][0] = 0.0; matrix[2][1] = 0.0; matrix[2][2] = 1.0; 
  let pos = vec4<f32>(vs_in.in_pos, 1.0);
  let abs_pos = uniforms.projection * matrix * pos;
  return VSOut(

fn fs_main(vs_out: VSOut) -> @location(0) vec4<f32> {
  let color = vec3(0.3, 0.6, 1.0);
  let a = mix(0.3, 0.0, smoothstep(0.0, 0.2, length(vs_out.uv - 0.5)));
  return vec4(color, a);

const compute = `
@group(0) @binding(0) var<storage, read_write> noise: array<vec3f>;
const PI = 3.1415926535;
const sw = vec2f(0.0, 1e-4);

const speed = ${speed};
const step_amt = ${stepAmt};
const world = ${worldSize};
const curl_step = ${curlStep};
const curl_offset = vec3(${curlOffset});

//	Simplex 3D Noise 
//	by Ian McEwan, Ashima Arts
//  MIT License. © Ian McEwan, Stefan Gustavson, Munrocket, Johan Helsing

fn mod289(x: f32) -> f32 {
    return x - floor(x * (1. / 289.)) * 289.;

fn mod289_3(x: vec3<f32>) -> vec3<f32> {
    return x - floor(x * (1. / 289.)) * 289.;

fn mod289_4(x: vec4<f32>) -> vec4<f32> {
    return x - floor(x * (1. / 289.)) * 289.;

fn permute4(x: vec4<f32>) -> vec4<f32> {
    return mod289_4(((x * 34.) + 1.) * x);

fn taylorInvSqrt(r: vec4f) -> vec4f {
  return 1.79284291400159 - 0.85373472095314 * r;

fn snoise(v: vec3f) -> f32 { 
  const C: vec2f = vec2(1.0/6.0, 1.0/3.0);
  const D: vec4f = vec4(0.0, 0.5, 1.0, 2.0);

// First corner
  var i = floor(v + dot(v, C.yyy) );
  var x0 = v - i + dot(i, ;

// Other corners
  var g = step(x0.yzx,;
  var l = 1.0 - g;
  var i1 = min(, l.zxy );
  var i2 = max(, l.zxy );

  //  x0 = x0 - 0. + 0.0 * C 
  var x1 = x0 - i1 + 1.0 *;
  var x2 = x0 - i2 + 2.0 *;
  var x3 = x0 - 1. + 3.0 *;

// Permutations
  i = mod289_3(i); 
  var p = permute4( permute4( permute4( 
             i.z + vec4(0.0, i1.z, i2.z, 1.0 ))
           + i.y + vec4(0.0, i1.y, i2.y, 1.0 )) 
           + i.x + vec4(0.0, i1.x, i2.x, 1.0 ));

// Gradients
// ( N*N points uniformly over a square, mapped onto an octahedron.)
  var n_ = 1.0/7.0; // N=7
  var ns = n_ * D.wyz - D.xzx;

  var j = p - 49.0 * floor(p * ns.z *ns.z);  //  mod(p,N*N)

  var x_ = floor(j * ns.z);
  var y_ = floor(j - 7.0 * x_ );    // mod(j,N)

  var x = x_ *ns.x + ns.yyyy;
  var y = y_ *ns.x + ns.yyyy;
  var h = 1.0 - abs(x) - abs(y);

  var b0 = vec4( x.xy, y.xy );
  var b1 = vec4(, );

  var s0 = floor(b0)*2.0 + 1.0;
  var s1 = floor(b1)*2.0 + 1.0;
  var sh = -step(h, vec4(0.0));

  var a0 = b0.xzyw + s0.xzyw*sh.xxyy ;
  var a1 = b1.xzyw + s1.xzyw*sh.zzww ;

  var p0 = vec3(a0.xy,h.x);
  var p1 = vec3(,h.y);
  var p2 = vec3(a1.xy,h.z);
  var p3 = vec3(,h.w);

//Normalise gradients
  var norm = taylorInvSqrt(vec4(
    dot(p0,p0), dot(p1,p1),
    dot(p2, p2), dot(p3,p3)

  p0 *= norm.x;
  p1 *= norm.y;
  p2 *= norm.z;
  p3 *= norm.w;

// Mix final noise value
  var m = max(0.6 - vec4(dot(x0,x0), dot(x1,x1), dot(x2,x2), dot(x3,x3)), vec4(0.0));
  m = m * m;
  return 42.0 * dot( m*m, vec4( dot(p0,x0), dot(p1,x1), 
                                dot(p2,x2), dot(p3,x3) ) );

fn noiseGrad(pos: vec3f) -> vec3f {
  let x1 = snoise(pos + sw.yxx);
  let x2 = snoise(pos - sw.yxx);
  let a = (x1 - x2) / (2 * sw.y);
  let y1 = snoise(pos + sw.xyx);
  let y2 = snoise(pos - sw.xyx);
  let b = (y1 - y2) / (2 * sw.y);
  let z1 = snoise(pos + sw.xxy);
  let z2 = snoise(pos - sw.xxy);
  let c = (z1 - z2) / (2 * sw.y);
  return vec3(a, b, c);

fn curl(pos: vec3f) -> vec3f {
  let noiseGrad0 = normalize(noiseGrad(pos));
  let noiseGrad1 = normalize(noiseGrad(pos + curl_step));
  return cross(noiseGrad0, noiseGrad1);

fn start_pos(cur: vec3f) -> vec3f {

// This is the "correct" way to spread points randomly
// through a sphere, but it breaks reset_pos, which
// breaks everything

  // let theta = cur.x * PI * 2.0;
  // let phi = acos(2.0 * cur.y * PI - 1.0);
  // let radius = pow(cur.z, 1.0 / 3.0) * world;
  let theta = cur.x * PI * 2.0;
  let phi = cur.y * PI;
  let radius = 10 + cur.z * world;
  return vec3(
    radius * sin(phi) * cos(theta),
    radius * sin(phi) * sin(theta),
    radius * cos(phi)

fn rand(co: vec2f) -> f32 {
  return fract(sin(dot(co, vec2(12.9898, 78.233))) * 43758.5453);

fn reset_pos(cur: vec3f) -> vec3f {
  let input = abs(vec3(
  return start_pos(input);

fn step_noise(cur: vec3f) -> vec3f {
  let curl = curl(cur / step_amt + curl_offset) * speed;
  let next = cur + curl;
  return next;

fn invocation_id(
  local_invocation_index: u32,
  workgroup_id: vec3u,
  num_workgroups: vec3u) -> u32 {
  let workgroup_index =  
     workgroup_id.x +
     workgroup_id.y * num_workgroups.x +
     workgroup_id.z * num_workgroups.x * num_workgroups.y;
  let global_invocation_index =
     workgroup_index * ${computeThreads} +
   return global_invocation_index;

@compute @workgroup_size(${workgroupSize}) fn initNoise(
  @builtin(local_invocation_index) lii: u32,
  @builtin(workgroup_id) wid: vec3<u32>,
  @builtin(num_workgroups) nwg: vec3<u32>
) {
  let id = invocation_id(lii, wid, nwg);
  noise[id] = start_pos(noise[id]);

@compute @workgroup_size(${workgroupSize}) fn computeNoise(
  @builtin(local_invocation_index) lii: u32,
  @builtin(workgroup_id) wid: vec3<u32>,
  @builtin(num_workgroups) nwg: vec3<u32>
) {
  let id = invocation_id(lii, wid, nwg);
  let cur = step_noise(noise[id]);

  let start = reset_pos(cur);
  let len = length(cur);
  noise[id] = mix(cur, start, step(1.0, len / world));

const uniforms = new Float32Array([
  0, 0, 0, 0,
  0, 0, 0, 0, // projection matrix
  0, 0, 0, 0,
  0, 0, 0, 0,
  0, 0, 0, 0,
  0, 0, 0, 0, // view matrix
  0, 0, 0, 0,
  0, 0, 0, 0,
  0, 0, 0, 0

const canvasSize = new Float32Array([ 1, 1 ]);

const projectionMatrix = mat4.create();
const viewMatrix = mat4.identity();
mat4.translate(viewMatrix, vec3.fromValues(0, 0, -300), viewMatrix);

const instances = new Float32Array(nInstances * 4);
const initInstances = (device, instanceBuffer) => {
  for (let i = 0; i < nInstances; i++) {
    ], i * 4);
  device.queue.writeBuffer(instanceBuffer, 0, instances);

const init = async (canvas) => {
  const gpu = navigator.gpu;
  const adapter = await gpu.requestAdapter();
  const device = await adapter.requestDevice();
  const queue = device.queue;

  const context = canvas.getContext("webgpu");
    format: gpu.getPreferredCanvasFormat(),
    usage: GPUTextureUsage.RENDER_ATTACHMENT,
    alphaMode: 'opaque'
  const dpr = Math.min(window.devicePixelRatio || 1, 2);

  canvas.width = canvas.offsetWidth * dpr;
  canvas.height = canvas.offsetHeight * dpr;
  canvasSize.set([ canvas.width, canvas.height ]);

  return { gpu, device, queue, canvas, context };

const initTextures = ({ canvas, context }) => {
  const ret = { };
  ret.resize = () => {
    canvas.width = canvas.offsetWidth * window.devicePixelRatio;
    canvas.height = canvas.offsetHeight * window.devicePixelRatio;
    canvasSize.set([ canvas.width, canvas.height ]);
    const aspect = canvasSize[0] / canvasSize[1];
      60 * Math.PI / 180,
  ret.update = () => {
    ret.colorTexture = context.getCurrentTexture();
    ret.colorTextureView = ret.colorTexture.createView();
  return ret;

const createBuffer = (device, arr, usage) => {
  const buffer = device.createBuffer({
    size: arr.byteLength,
    mappedAtCreation: true
  const writeArray = new arr.constructor(buffer.getMappedRange());
  return buffer;

const initUniformBindGroup = (device, pipeline, ubo, instanceBuffer) => {
  const uniformBindGroup = device.createBindGroup({
    layout: pipeline.getBindGroupLayout(0),
    entries: [
        binding: 0,
        resource: {
          buffer: ubo
  return uniformBindGroup;

const initBuffers = async (device) => {
  const positions = new Float32Array([
     1, -1, 0, 1, 0, // x, y, z, u, v
    -1, -1, 0, 0, 0,
    -1,  1, 0, 0, 1,
     1,  1, 0, 1, 1
  const indices = new Uint16Array([ 0, 1, 2, 0, 2, 3 ]);

  const positionBuffer = createBuffer(device, positions, GPUBufferUsage.VERTEX);
  const indexBuffer = createBuffer(device, indices, GPUBufferUsage.INDEX);
  const instanceBuffer = createBuffer(device, instances, 
      GPUBufferUsage.STORAGE | GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST);
  const ubo = createBuffer(device, uniforms, 
     GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST);
  return { positionBuffer, indexBuffer, instanceBuffer, ubo };

const initPipeline = (device, gpu) => {
  const shaders = device.createShaderModule({
    code: shader
  const pipelineDesc = {
    layout: "auto",
    vertex: {
      module: shaders,
      entryPoint: 'vs_main',
      buffers: [
          attributes: [
              shaderLocation: 0,
              offset: 0,
              format: 'float32x3'
              shaderLocation: 1,
              offset: 4 * 3,
              format: 'float32x2'
          arrayStride: 4 * 5,
          stepMode: 'vertex'
          attributes: [
              shaderLocation: 2,
              offset: 0,
              format: 'float32x3'
          arrayStride: 4 * 4,
          stepMode: 'instance'
    fragment: {
      module: shaders,
      entryPoint: 'fs_main',
      targets: [
          format: gpu.getPreferredCanvasFormat(),
          writeMask: GPUColorWrite.ALL,
          blend: {
            color: {
              srcFactor: 'src-alpha',
              dstFactor: 'one',
              operation: 'add',
            alpha: {
              srcFactor: 'zero',
              dstFactor: 'one',
              operation: 'add',
    primitive: {
      frontFace: 'cw',
      cullMode: 'none',
      topology: 'triangle-list'
  const pipeline = device.createRenderPipeline(pipelineDesc);
  return pipeline;

const computePipeline = (device, instanceBuffer, entryPoint) => {
  const pipeline = device.createComputePipeline({
    label: "Compute pipeline",
    layout: "auto",
    compute: {
      module: device.createShaderModule({
        code: compute
  const bindGroup = device.createBindGroup({
    layout: pipeline.getBindGroupLayout(0),
    entries: [
      { binding: 0, resource: { buffer: instanceBuffer } }
  return { pipeline, bindGroup };

const stepCompute = (compute, { device, queue }) => {
  const commandEncoder = device.createCommandEncoder();
  const computePass = commandEncoder.beginComputePass({ label: "Compute pass" });
  computePass.setBindGroup(0, compute.bindGroup);
  queue.submit([ commandEncoder.finish() ]);

const encodeCommands = ({ device, canvas, queue }, textures, buffers, pipeline, ubg, compute) => {
  const colorAttachment = {
    view: textures.colorTextureView,
    clearValue: { r: 0, g: 0, b: 0, a: 1 },
    loadOp: 'clear',
    storeOp: 'store'

  const renderPassDesc = {
    colorAttachments: [ colorAttachment ]
  const [ width, height ] = canvasSize;
  const commandEncoder = device.createCommandEncoder();
  const computePass = commandEncoder.beginComputePass({ label: "Compute pass" });
  computePass.setBindGroup(0, compute.bindGroup);
  const passEncoder = commandEncoder.beginRenderPass(renderPassDesc);
  passEncoder.setViewport(0, 0, width, height, 0, 1);
  passEncoder.setScissorRect(0, 0, width, height);
  passEncoder.setVertexBuffer(0, buffers.positionBuffer);
  passEncoder.setVertexBuffer(1, buffers.instanceBuffer);
  passEncoder.setIndexBuffer(buffers.indexBuffer, 'uint16');
  passEncoder.setBindGroup(0, ubg);
  passEncoder.drawIndexed(6, nInstances);
  queue.submit([ commandEncoder.finish() ]);

(async () => {
  const canvas = document.getElementById("canvas");
  const config = await init(canvas);
  const { device, gpu, queue } = config;
  const textures = initTextures(config);
  const buffers = await initBuffers(device);
  const pipeline = initPipeline(device, gpu);
  const ubg = initUniformBindGroup(device, pipeline, buffers.ubo, buffers.instanceBuffer);
  const initCompute = computePipeline(device, buffers.instanceBuffer, "initNoise");
  const compute = computePipeline(device, buffers.instanceBuffer, "computeNoise");
  initInstances(device, buffers.instanceBuffer);
  stepCompute(initCompute, config);
  window.addEventListener('resize', textures.resize);
  const render = () => {
    uniforms.set([ ], 0);
    uniforms.set(canvasSize, 2);
    uniforms.set(projectionMatrix, 4);
    uniforms.set(viewMatrix, 20);

    queue.writeBuffer(buffers.ubo, 0, uniforms);
    encodeCommands(config, textures, buffers, pipeline, ubg, compute);

