From 91b992428eaa2f9a5ea2e1ab958c3cac1482ab26 Mon Sep 17 00:00:00 2001 From: osmarks Date: Sun, 14 Apr 2024 14:52:36 +0100 Subject: [PATCH] article tweak & highlight sidenotes --- blog/ml-workstation.md | 8 +++++--- src/page.js | 20 ++++++++++++++++++++ src/style.sass | 6 ++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/blog/ml-workstation.md b/blog/ml-workstation.md index 2e5ceeb..365d949 100644 --- a/blog/ml-workstation.md +++ b/blog/ml-workstation.md @@ -2,7 +2,7 @@ title: So You Want A Cheap ML Workstation description: How to run local AI slightly more cheaply than with a prebuilt system. Somewhat opinionated. created: 25/02/2024 -updated: 26/03/2024 +updated: 14/04/2024 slug: mlrig --- @@ -61,7 +61,7 @@ Native BF16 support is important too, but Ampere and Ada Lovelace both have this ### Multi-GPU -You can run two graphics cards in a consumer system without any particularly special requirements - just make sure your power supply [can handle it](#power-consumption) and that you get a mainboard with PCIe slots with enough spacing between them. Each GPU will run with 8 PCIe lanes, via PCIe bifurcation. Any parallelizable workload which fits onto a single card should work at almost double speed with data parallelism, and larger models can be loaded across both via pipeline or tensor parallelism. Note that the latter requires fast interconnect between the GPUs. To spite users[^9], only the RTX 3090 has NVLink, which provides about 50GB/s (each direction) between GPUs[^8], and only workstation GPUs have PCIe P2P enabled, which reduces latency and increases bandwidth when using standard PCIe between two GPUs. However, you can get away without either of these if you don't need more than about 12GB/s (each direction) between GPUs, which I am told you usually don't. +You can run two graphics cards in a consumer system without any particularly special requirements - just make sure your power supply [can handle it](#power-consumption) and that you get a mainboard with PCIe slots with enough spacing between them. Each GPU will run with 8 PCIe lanes, via PCIe bifurcation. Any parallelizable workload which fits onto a single card should work at almost double speed with data parallelism, and larger models can be loaded across both via pipeline or tensor parallelism. Note that the latter requires fast interconnect between the GPUs. To spite users[^9], only the RTX 3090 has NVLink, which provides about 50GB/s (each direction) between GPUs[^8], and only workstation GPUs have PCIe P2P enabled[^15], which reduces latency and increases bandwidth when using standard PCIe between two GPUs. However, you can get away without either of these if you don't need more than about 12GB/s (each direction) between GPUs, which I am told you usually don't. Technically, you *can* plug in more GPUs than this (up to 4), but they'll have less bandwidth and messing around with riser cables is usually necessary. @@ -144,4 +144,6 @@ They describe somewhat horrifying electrical engineering problems due to using s [^13]: This is not hard to fix with aftermarket fans and a 3D printer and/or zip ties. -[^14]: You should be able to hold weights in FP16 and do the maths in FP32, giving you FP32 speeds instead of the horrible slowdown, though. \ No newline at end of file +[^14]: You should be able to hold weights in FP16 and do the maths in FP32, giving you FP32 speeds instead of the horrible slowdown, though. + +[^15]: Geohotz/Tinygrad now has a [patch](https://github.com/tinygrad/open-gpu-kernel-modules) to the open-source kernel module which makes it work, at least on 3090s and 4090s, by hacking it into using native PCIe capabilities which are retained. \ No newline at end of file diff --git a/src/page.js b/src/page.js index 82da967..4f27a0a 100644 --- a/src/page.js +++ b/src/page.js @@ -500,6 +500,16 @@ if (sidenotes && footnotes) { } rendered = false } + + for (const item of sidenotes.querySelectorAll(".footnote-item")) { + const link = article.querySelector(`#${item.id.replace(/^fn/, "fnref")}`) + link.addEventListener("mouseover", () => { + item.classList.add("hl2") + }) + link.addEventListener("mouseleave", () => { + item.classList.remove("hl2") + }) + } } window.onresize = relayout @@ -517,10 +527,20 @@ if (sidenotes && footnotes) { }) } +let previousHighlight const fixDetailsSummary = () => { const el = document.getElementById(window.location.hash.slice(1)) var parent = el if (!el) return + console.log("got", el) + if (el.classList.contains("footnote-item")) { + if (previousHighlight) { + previousHighlight.classList.remove("hl1") + } + console.log("is footnote item", el) + el.classList.add("hl1") + previousHighlight = el + } while (parent.parentElement) { if (parent.nodeName === "DETAILS") { parent.setAttribute("open", true) diff --git a/src/style.sass b/src/style.sass index 81852d7..d5348c1 100644 --- a/src/style.sass +++ b/src/style.sass @@ -233,6 +233,12 @@ blockquote .sidenotes-container display: block +$hl-border: 3px +.footnote-item.hl1, .footnote-item.hl2 + border-top: solid $hl-border orange + border-bottom: solid $hl-border orange + margin-top: -$hl-border + @media (min-width: calc(2 * $content-margin + $content-width + $sidenotes-width + $navbar-width)) // fullwidth 3-pane layout body