1
0
mirror of https://github.com/osmarks/website synced 2025-01-11 01:40:55 +00:00

new graphics demos, fix a bug, minor post updates, add opengraph

This commit is contained in:
osmarks 2024-10-11 18:42:05 +01:00
parent 626608939b
commit 908422beec
11 changed files with 403 additions and 9 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

Binary file not shown.

BIN
assets/misc/2021-jouppi.pdf Normal file

Binary file not shown.

View File

@ -21,7 +21,7 @@ Matrix multiplication uses $O(n^3)$ operations (wrt. matrix size)[^1], primarily
It is, of course, not really possible to do 300 useful operations on a single byte without interacting with anything else, so the GPU has registers, shared memory and caches with much higher bandwidth and lower latency than main memory - this allows keeping chunks of the input matrices closer to compute units and performing tiled matrix multiplications using those chunks[^5]. There's also dedicated hardware for asynchronously fetching data from memory without ever tying up compute generating memory addresses. Even with all this, H100 GPUs can usually only manage ~70% of their quoted FLOPS performing a matrix multiplication, and large-scale training runs only manage ~40%[^4].
The worse utiliation on real training runs is partly because of individual weight matrices and inputs not filling the GPU, and partly because of the aforementioned scalar/vector operations: naively, doing $\mathrm{ReLU}$ to a vector would waste the vast majority of FLOPS, because each value would be fetched, trivially operated on, and then written back, though kernel fusion (applying it before writing to memory during a matmul) mitigates this. There are also significant slowdowns introduced by multi-GPU operations. I don't know exactly what causes this, since communications and computation can mostly be overlapped in larger models, but one part is that all GPUs are forced to wait for all others at various points, and there is significant variance in performance between GPUs[^6]. Also, network communications tie up memory bandwidth and power budget.
The worse utilization on real training runs is partly because of individual weight matrices and inputs not filling the GPU, and partly because of the aforementioned scalar/vector operations: naively, doing $\mathrm{ReLU}$ to a vector would waste the vast majority of FLOPS, because each value would be fetched, trivially operated on, and then written back, though kernel fusion (applying it before writing to memory during a matmul) mitigates this. There are also significant slowdowns introduced by multi-GPU operations. I don't know exactly what causes this, since communications and computation can mostly be overlapped in larger models, but one part is that all GPUs are forced to wait for all others at various points, and there is significant variance in performance between GPUs[^6]. Also, network communications tie up memory bandwidth and power budget.
### DLRMs

View File

@ -93,6 +93,10 @@ I don't know exactly what about [Ender's Game](https://en.wikipedia.org/wiki/End
[The Expanse](https://www.goodreads.com/series/56399-the-expanse) by James S. A. Corey is another (see [Theft of Fire](#theft-of-fire) below for my general thoughts on this, though The Expanse was first and much more widely known, so) modern (mostly) hard science fiction series. It's also a TV series now, though that got truncated by internal meddling.
### As the Godking Wills
[As the Godking Wills](https://www.royalroad.com/fiction/28639/as-the-godking-wills) is a short(ish) somewhat Discworld like political/theological black comedy.
### Good Omens
[Good Omens](https://en.wikipedia.org/wiki/Good_Omens) by Neil Gaiman and Terry Pratchett is a very funny... religious comedy... in which the apocalypse is averted by vaguely dissatisfied angels.

View File

@ -0,0 +1,247 @@
---
title: "Demo: Limitless* Grid"
description: The Limitless Grid screensaver (kind of) implemented in a somewhat laggy pixel shader.
slug: demogrid
---
<!DOCTYPE html>
<canvas id="d"></canvas>
<table id="ctrl"></table>
<p>
Originally implemented in ShaderToy; ported to raw WebGL here.
I don't actually know computer graphics and today's best omniscient machine oracles are unhelpful for this, so this mostly relies on trial and error and some vector maths I did on paper.
Each ray in the viewport is intersected with planes in the xz, yz and xy directions. There are finitely many, since I don't know how or whether you can analytically compute the intersections for an infinite stack of them, so it is not, strictly, a "limitless" grid, though this is also true of the <a href="https://github.com/ghisguth/sunlight/tree/HEAD/limitlessgrid">original</a> and each plane is infinite in extent.
</p>
<script>
const SHADER = `precision mediump float;
uniform float time;
uniform vec2 resolution;
uniform float lscale;
uniform float lthick;
uniform float lbright;
uniform float speed;
uniform vec3 yz_col;
uniform vec3 xz_col;
uniform vec3 xy_col;
float dfn(float x) {
float lscaleh = lscale * 0.5;
if (abs(x)>lscale*100.0) return 100.0;
return abs(mod(x - lscaleh, lscale) - lscaleh) / lscale;
}
float dfn_nonlin(float x) {
//return x;
if (x < lthick) return lbright;
else return 0.0;
}
vec3 ray_direction(vec2 uv, vec3 cam, vec3 target, float zoom) {
vec3 f = normalize(target - cam);
vec3 r = normalize(cross(vec3(0.0, 1.0, 0.0), f));
vec3 u = cross(f, r);
return normalize(uv.x * r + uv.y * u + zoom * f);
}
vec3 line_plane_intersect(vec3 l0, vec3 ldir, vec3 p0, vec3 n, out float lam) {
lam = dot(p0 - l0, n) / dot(ldir, n);
vec3 isect = l0 + ldir * lam;
return isect;
}
void main() {
vec2 uv = 2.0 * gl_FragCoord.xy / resolution.xy - vec2(1.0, 1.0);
vec3 l0 = vec3(0.0); // this cannot actually be moved off zero or some handwaves made somewhere (probably dfn(isect)) break
vec3 ldir = ray_direction(uv, l0, vec3(sin(speed * time), cos(speed * time), 0.1), 1.0);
float n = 1.0;
vec3 bri = vec3(0.0);
for (float n = 0.0; n <= 8.0; n += 1.0) {
{
// yz plane
vec3 p0 = vec3(n, 0.0, 0.0);
vec3 norm = vec3(1.0, 0.0, 0.0);
float lam;
vec3 isect = line_plane_intersect(l0, ldir, p0, norm, lam);
bri += yz_col * (dfn_nonlin(dfn(isect.y)) + dfn_nonlin(dfn(isect.z)));
}
{
// xz plane
vec3 p0 = vec3(0.0, n, 0.0);
vec3 norm = vec3(0.0, 1.0, 0.0);
float lam;
vec3 isect = line_plane_intersect(l0, ldir, p0, norm, lam);
bri += xz_col * (dfn_nonlin(dfn(isect.x)) + dfn_nonlin(dfn(isect.z)));
}
{
// xy plane
vec3 p0 = vec3(0.0, 0.0, n);
vec3 norm = vec3(0.0, 0.0, 1.0);
float lam;
vec3 isect = line_plane_intersect(l0, ldir, p0, norm, lam);
bri += xy_col * (dfn_nonlin(dfn(isect.x)) + dfn_nonlin(dfn(isect.y)));
}
}
// Output to screen
gl_FragColor = vec4(bri, 1.0);
}`
let size = 1000
let canvas = document.querySelector("canvas")
canvas.width = canvas.height = size
canvas.style.width = canvas.style.height = "100%n"
// the WebGL API is terrible and if I meet whoever designed it I will armbar them
const gl = canvas.getContext("webgl")
// https://developer.mozilla.org/en-US/docs/Web/API/WebGLShader
function createShader(sourceCode, type) {
// Compiles either a shader of type gl.VERTEX_SHADER or gl.FRAGMENT_SHADER
const shader = gl.createShader(type)
gl.shaderSource(shader, sourceCode)
gl.compileShader(shader)
if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) {
const info = gl.getShaderInfoLog(shader)
throw `Could not compile WebGL program. \n\n${info}`
}
return shader
}
const program = gl.createProgram()
// Attach pre-existing shaders
gl.attachShader(program, createShader(`precision mediump float;
attribute vec2 position;
void main() {
gl_Position = vec4(position, 0.0, 1.0);
}`, gl.VERTEX_SHADER))
gl.attachShader(program, createShader(SHADER, gl.FRAGMENT_SHADER))
gl.linkProgram(program)
if (!gl.getProgramParameter(program, gl.LINK_STATUS)) {
const info = gl.getProgramInfoLog(program)
throw `Could not compile WebGL program. \n\n${info}`
}
const programInfo = {
program,
attribLocations: {
position: gl.getAttribLocation(program, "position"),
},
uniformLocations: {
time: gl.getUniformLocation(program, "time"),
resolution: gl.getUniformLocation(program, "resolution"),
lscale: gl.getUniformLocation(program, "lscale"),
lthick: gl.getUniformLocation(program, "lthick"),
lbright: gl.getUniformLocation(program, "lbright"),
speed: gl.getUniformLocation(program, "speed"),
xz_col: gl.getUniformLocation(program, "xz_col"),
yz_col: gl.getUniformLocation(program, "yz_col"),
xy_col: gl.getUniformLocation(program, "xy_col"),
},
};
const errorCheck = () => {
const error = gl.getError();
if (error !== gl.NO_ERROR) {
console.error("WebGL error:", error)
}
}
const positionBuffer = gl.createBuffer()
const positions = new Float32Array([1.0, 1.0, -1.0, 1.0, 1.0, -1.0, -1.0, -1.0])
gl.bindBuffer(gl.ARRAY_BUFFER, positionBuffer)
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array(positions), gl.STATIC_DRAW)
errorCheck()
const inputs = {
lscale: ["range", 0.05, 1],
lthick: ["range", 0.01, 0.2],
lbright: ["range", 0.01, 0.2],
speed: ["range", 0.005, 1],
xz_col: ["color"],
yz_col: ["color"],
xy_col: ["color"]
}
const inputValues = {
lscale: 0.2,
lthick: 0.06,
lbright: 0.02,
speed: 0.03,
xz_col: "#ff0000",
xy_col: "#00ff00",
yz_col: "#0000ff"
}
const ctrl = document.querySelector("#ctrl")
for (const [name, [type, min, max]] of Object.entries(inputs)) {
const container = document.createElement("tr")
const label = document.createElement("label")
label.textContent = name
label.setAttribute("for", name)
label.style.minWidth = "10em"
label.style.display = "inline-block"
container.appendChild(label)
const input = document.createElement("input")
input.type = type
if (min && max) {
input.min = Math.log(min)
input.max = Math.log(max)
input.step = (Math.log(max) - Math.log(min)) / 100
input.value = Math.log(inputValues[name])
} else {
input.value = inputValues[name]
}
input.style.verticalAlign = "middle"
input.setAttribute("name", name)
input.oninput = () => {
if (type === "range") {
const v = Math.exp(parseFloat(input.value))
if (isNaN(v)) {
input.value = Math.log(inputValues[name])
} else {
inputValues[name] = [v]
}
}
if (type === "color") {
const rawColor = parseInt(input.value.slice(1), 16)
// R, G, B floats
inputValues[name] = [
((rawColor >> 16) & 0xFF) / 0xFF,
((rawColor >> 8) & 0xFF) / 0xFF,
(rawColor & 0xFF) / 0xFF
]
}
}
input.oninput()
container.appendChild(input)
ctrl.appendChild(container)
}
let start = Date.now() / 1000
let rec = () => {
gl.clearColor(0.0, 0.0, 0.0, 1.0)
gl.clear(gl.COLOR_BUFFER_BIT)
gl.useProgram(programInfo.program)
errorCheck()
gl.bindBuffer(gl.ARRAY_BUFFER, positionBuffer)
gl.vertexAttribPointer(programInfo.attribLocations.position, 2, gl.FLOAT, false, 0, 0)
gl.enableVertexAttribArray(programInfo.attribLocations.position)
gl.uniform1f(programInfo.uniformLocations.time, Date.now() / 1000 - start)
for (const [name, value] of Object.entries(inputValues)) {
gl[`uniform${value.length}f`](programInfo.uniformLocations[name], ...value)
}
gl.uniform2f(programInfo.uniformLocations.resolution, canvas.width, canvas.height)
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4)
errorCheck()
requestAnimationFrame(rec)
}
rec()
</script>

View File

@ -0,0 +1,139 @@
---
title: "Demo: Wiggly Lines"
description: An unfinished attempt to replicate an Apple screensaver.
slug: demolines
---
<!DOCTYPE html>
<canvas id="d"></canvas>
<script>
// Perlin noise implementation from somewhere
function fade(t)
{
return t * t * t * (t * (t * 6 - 15) + 10);
}
function lerp(t, a, b)
{
return a + t * (b - a);
}
function grad(hash, x, y, z)
{
// Convert lo 4 bits of hash code into 12 gradient directions.
var h = hash & 15,
u = h < 8 ? x : y,
v = h < 4 ? y : h == 12 || h == 14 ? x : z;
return ((h & 1) == 0 ? u : -u) + ((h & 2) == 0 ? v : -v);
}
function scale(n)
{
return (1 + n) / 2;
}
var p = new Array(512);
var permutation = [
151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142, 8, 99,
37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32,
57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166,
77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, 40, 244, 102, 143,
54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196, 135, 130, 116, 188, 159,
86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123, 5, 202, 38, 147, 118, 126, 255, 82,
85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42, 223, 183, 170, 213, 119, 248, 152, 2, 44,
154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9, 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232,
178, 185, 112, 104, 218, 246, 97, 228, 251, 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51,
145, 235, 249, 14, 239, 107, 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45,
127, 4, 150, 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180
];
for (var i = 0; i < 256; i++) {
p[256 + i] = p[i] = permutation[i];
}
/** Returns a number between 0 and 1. */
function noise3d(x, y, z)
{
// Find unit cube that contains point.
var X = Math.floor(x) & 255,
Y = Math.floor(y) & 255,
Z = Math.floor(z) & 255;
// Find relative x,y,z of point in cube.
x -= Math.floor(x);
y -= Math.floor(y);
z -= Math.floor(z);
// Compute fade curves for each of x,y,z.
var u = fade(x),
v = fade(y),
w = fade(z);
// Hash coordinates of the corners.
var A = p[X ] + Y, AA = p[A] + Z, AB = p[A + 1] + Z,
B = p[X + 1] + Y, BA = p[B] + Z, BB = p[B + 1] + Z;
// Add blended results from 8 corners of cube.
return scale(
lerp(
w,
lerp(
v,
lerp(
u,
grad(p[AA], x, y, z),
grad(p[BA], x - 1, y, z)
),
lerp(
u,
grad(p[AB], x, y - 1, z),
grad(p[BB], x - 1, y - 1, z)
)
),
lerp(
v,
lerp(
u,
grad(p[AA + 1], x, y, z - 1),
grad(p[BA + 1], x - 1, y, z - 1)
),
lerp(
u,
grad(p[AB + 1], x, y - 1, z - 1),
grad(p[BB + 1], x - 1, y - 1, z - 1)
)
)
)
);
}
// main code
let hues = ["180", "300", "60", "120"]
let size = 1000
let sc1 = 16
let sc2 = 3
let sc3 = 2.2
let grid = 8
let canvas = document.querySelector("canvas")
canvas.width = canvas.height = size
canvas.style.width = canvas.style.height = size + "px"
let ctx = canvas.getContext("2d")
let rec = () => {
let dt = Date.now() / 5000
ctx.fillRect(0,0,size,size)
for (let x = grid; x < size; x += grid) {
for (let y = grid; y < size; y += grid) {
let xn = noise3d(x/size/sc3,y/size/sc3,dt) * 2 - 1
let yn = noise3d(x/size/sc3,y/size/sc3,dt+3882) * 2 - 1
let sn = noise3d(x/size/sc3,y/size/sc3,dt+51235)
let cn = noise3d(x/size/sc3,y/size/sc3,dt+1114)
ctx.strokeStyle = `oklch(${sn * 50 + 50}% 80% ${cn * 360}deg)`
ctx.lineWidth = 2
ctx.beginPath()
ctx.moveTo(x + xn * grid * sc2, y + yn * grid * sc2)
ctx.lineTo(x + (xn ** 2) * grid * sc1, y + (yn ** 2) * grid * sc1)
ctx.stroke()
}
}
requestAnimationFrame(rec)
}
rec()
</script>

View File

@ -250,7 +250,7 @@ const processExperiments = async () => {
title: page.data.title,
description: page.data.description,
html: page.content,
timestamp: dayjs(await fsp.stat(indexPath).then(x => x.mtimeMs))
timestamp: dayjs(await fsp.stat(path.join(subdirectory, "index.html")).then(x => x.mtimeMs))
})
return indexPath
},
@ -388,7 +388,6 @@ const fetchMicroblog = async () => {
content: post.object.content,
i
})))
}
const runOpenring = async () => {

View File

@ -24,6 +24,11 @@ html(lang="en")
meta(name="viewport", content="width=device-width, initial-scale=1.0")
if description
meta(name="description", content=description)
meta(property="og:description", content=description)
meta(property="og:title", content=title)
meta(property="og:type", content="website")
meta(property="og:url", content=`https://${domain}`)
meta(property="og:site_name", content=name)
link(rel="manifest", href="/assets/manifest.webmanifest")
link(rel="shortcut icon", href="/assets/images/logo256.png", type="image/png")
meta(content=`https://${domain}/assets/images/logo256.png`, property="og:image")